The package rpms/ceph.git has added or updated architecture specific content in its
spec file (ExclusiveArch/ExcludeArch or %ifarch/%ifnarch) in commit(s):
https://src.fedoraproject.org/cgit/rpms/ceph.git/commit/?id=4bd118debcd4a...
https://src.fedoraproject.org/cgit/rpms/ceph.git/commit/?id=78e076b5afe9c....
Change:
+%ifarch s390
-%ifarch x86_64
Thanks.
Full change:
============
commit e06c944dbd1dd5f45f16beb5932a0e7a73b41da4
Author: Kaleb S. KEITHLEY <kkeithle(a)redhat.com>
Date: Thu May 6 07:17:45 2021 -0400
16.2.2 GA
Signed-off-by: Kaleb S. KEITHLEY <kkeithle(a)redhat.com>
diff --git a/sources b/sources
index f99318a..adc6360 100644
--- a/sources
+++ b/sources
@@ -1,5 +1 @@
-<<<<<<< HEAD
-SHA512 (ceph-16.2.1.tar.gz) =
42e404aa41565485dc4c08b66efb3de719adee72cfc535586992e15ab4601bfbd909ef61abf412badbbfa7ef13bf91a7c0576fbbf3f9f687bc7b465b21a8c416
-=======
SHA512 (ceph-16.2.2.tar.gz) =
4269fe07821400655c5cfcc2828b4a39a0adc81ef0045d86b50b539c13c151c9ace9710fe365bbf2de7e5134e03c45c88f19f4adcb7c4d608eab4b9580da113c
->>>>>>> origin/rawhide
commit 145268fb052ec4b1f5a725cef8ce6666b0a72875
Merge: 84c3ccc 4bd118d
Author: Kaleb S. KEITHLEY <kkeithle(a)redhat.com>
Date: Thu May 6 07:16:36 2021 -0400
16.2.2 GA
Signed-off-by: Kaleb S. KEITHLEY <kkeithle(a)redhat.com>
diff --cc sources
index 3de5593,adc6360..f99318a
--- a/sources
+++ b/sources
@@@ -1,1 -1,1 +1,5 @@@
++<<<<<<< HEAD
+SHA512 (ceph-16.2.1.tar.gz) =
42e404aa41565485dc4c08b66efb3de719adee72cfc535586992e15ab4601bfbd909ef61abf412badbbfa7ef13bf91a7c0576fbbf3f9f687bc7b465b21a8c416
++=======
+ SHA512 (ceph-16.2.2.tar.gz) =
4269fe07821400655c5cfcc2828b4a39a0adc81ef0045d86b50b539c13c151c9ace9710fe365bbf2de7e5134e03c45c88f19f4adcb7c4d608eab4b9580da113c
++>>>>>>> origin/rawhide
commit 4bd118debcd4aac68d8ce9472b887723546ef7b5
Author: Kaleb S. KEITHLEY <kkeithle(a)f34node1.kkeithle.usersys.redhat.com>
Date: Wed May 5 17:06:40 2021 -0400
16.2.2 GA
Signed-off-by: Kaleb S. KEITHLEY
<kkeithle(a)f34node1.kkeithle.usersys.redhat.com>
diff --git a/0004-src-CMakeLists.txt.patch b/0004-src-CMakeLists.txt.patch
deleted file mode 100644
index f3f2e36..0000000
--- a/0004-src-CMakeLists.txt.patch
+++ /dev/null
@@ -1,12 +0,0 @@
---- ceph-16.0.0/src/CMakeLists.txt.orig 2021-01-25 13:45:15.316053258 -0500
-+++ ceph-16.0.0/src/CMakeLists.txt 2021-01-25 13:43:34.418305591 -0500
-@@ -29,7 +29,8 @@
- -D_THREAD_SAFE
- -D__STDC_FORMAT_MACROS
- -D_FILE_OFFSET_BITS=64
-- -DBOOST_ASIO_DISABLE_THREAD_KEYWORD_EXTENSION)
-+ -DBOOST_ASIO_DISABLE_THREAD_KEYWORD_EXTENSION
-+ -DBOOST_ASIO_USE_TS_EXECUTOR_AS_DEFAULT)
- if(LINUX)
- add_definitions("-D_GNU_SOURCE")
- endif()
diff --git a/0012-rgw.patch b/0012-rgw.patch
deleted file mode 100644
index 0d85501..0000000
--- a/0012-rgw.patch
+++ /dev/null
@@ -1,7721 +0,0 @@
-From 483302af2622cb26983c847196b8bad0a80fbd2f Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Sat, 21 Nov 2020 17:04:12 -0500
-Subject: [PATCH 01/26] cls/log: Take const references of things you won't
- modify
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 73ea8cec06addc6af2ba354321f1099f657f13c5)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/cls/log/cls_log_client.cc | 4 ++--
- src/cls/log/cls_log_client.h | 6 +++---
- 2 files changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/src/cls/log/cls_log_client.cc b/src/cls/log/cls_log_client.cc
-index 418599c8066e4..182bb9fec47e9 100644
---- a/src/cls/log/cls_log_client.cc
-+++ b/src/cls/log/cls_log_client.cc
-@@ -113,8 +113,8 @@ class LogListCtx : public ObjectOperationCompletion {
- }
- };
-
--void cls_log_list(librados::ObjectReadOperation& op, utime_t& from, utime_t&
to,
-- const string& in_marker, int max_entries,
-+void cls_log_list(librados::ObjectReadOperation& op, const utime_t& from,
-+ const utime_t& to, const string& in_marker, int max_entries,
- list<cls_log_entry>& entries,
- string *out_marker, bool *truncated)
- {
-diff --git a/src/cls/log/cls_log_client.h b/src/cls/log/cls_log_client.h
-index b049c2cc01bda..2afdabeb3e0a2 100644
---- a/src/cls/log/cls_log_client.h
-+++ b/src/cls/log/cls_log_client.h
-@@ -19,9 +19,9 @@ void cls_log_add(librados::ObjectWriteOperation& op,
cls_log_entry& entry);
- void cls_log_add(librados::ObjectWriteOperation& op, const utime_t& timestamp,
- const std::string& section, const std::string& name,
ceph::buffer::list& bl);
-
--void cls_log_list(librados::ObjectReadOperation& op, utime_t& from, utime_t&
to,
-- const std::string& in_marker, int max_entries,
-- std::list<cls_log_entry>& entries,
-+void cls_log_list(librados::ObjectReadOperation& op, const utime_t& from,
-+ const utime_t& to, const std::string& in_marker,
-+ int max_entries, std::list<cls_log_entry>& entries,
- std::string *out_marker, bool *truncated);
-
- void cls_log_trim(librados::ObjectWriteOperation& op, const utime_t& from_time,
const utime_t& to_time,
-
-From 35f044f39da713b3bf4c5002aade7b456727190e Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Tue, 3 Nov 2020 16:02:26 -0500
-Subject: [PATCH 02/26] rgw: Add AioCompletion* versions for the rest of the
- FIFO methods
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 665573ab8905bfa2e1ede6fc3be9bc80a625cb49)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/cls_fifo_legacy.cc | 1583 +++++++++++++++++++++-----
- src/rgw/cls_fifo_legacy.h | 91 +-
- src/rgw/rgw_datalog.cc | 7 +-
- src/test/rgw/test_cls_fifo_legacy.cc | 484 +++++++-
- 4 files changed, 1826 insertions(+), 339 deletions(-)
-
-diff --git a/src/rgw/cls_fifo_legacy.cc b/src/rgw/cls_fifo_legacy.cc
-index d835aeec76ab8..569a3e77c458f 100644
---- a/src/rgw/cls_fifo_legacy.cc
-+++ b/src/rgw/cls_fifo_legacy.cc
-@@ -109,6 +109,7 @@ int get_meta(lr::IoCtx& ioctx, const std::string& oid,
- return r;
- };
-
-+namespace {
- void update_meta(lr::ObjectWriteOperation* op, const fifo::objv& objv,
- const fifo::update& update)
- {
-@@ -175,6 +176,27 @@ int push_part(lr::IoCtx& ioctx, const std::string& oid,
std::string_view tag,
- return retval;
- }
-
-+void push_part(lr::IoCtx& ioctx, const std::string& oid, std::string_view tag,
-+ std::deque<cb::list> data_bufs, std::uint64_t tid,
-+ lr::AioCompletion* c)
-+{
-+ lr::ObjectWriteOperation op;
-+ fifo::op::push_part pp;
-+
-+ pp.tag = tag;
-+ pp.data_bufs = data_bufs;
-+ pp.total_len = 0;
-+
-+ for (const auto& bl : data_bufs)
-+ pp.total_len += bl.length();
-+
-+ cb::list in;
-+ encode(pp, in);
-+ op.exec(fifo::op::CLASS, fifo::op::PUSH_PART, in);
-+ auto r = ioctx.aio_operate(oid, c, &op, lr::OPERATION_RETURNVEC);
-+ ceph_assert(r >= 0);
-+}
-+
- void trim_part(lr::ObjectWriteOperation* op,
- std::optional<std::string_view> tag,
- std::uint64_t ofs, bool exclusive)
-@@ -232,6 +254,70 @@ int list_part(lr::IoCtx& ioctx, const std::string& oid,
- return r;
- }
-
-+struct list_entry_completion : public lr::ObjectOperationCompletion {
-+ CephContext* cct;
-+ int* r_out;
-+ std::vector<fifo::part_list_entry>* entries;
-+ bool* more;
-+ bool* full_part;
-+ std::string* ptag;
-+ std::uint64_t tid;
-+
-+ list_entry_completion(CephContext* cct, int* r_out,
std::vector<fifo::part_list_entry>* entries,
-+ bool* more, bool* full_part, std::string* ptag,
-+ std::uint64_t tid)
-+ : cct(cct), r_out(r_out), entries(entries), more(more),
-+ full_part(full_part), ptag(ptag), tid(tid) {}
-+ virtual ~list_entry_completion() = default;
-+ void handle_completion(int r, bufferlist& bl) override {
-+ if (r >= 0) try {
-+ fifo::op::list_part_reply reply;
-+ auto iter = bl.cbegin();
-+ decode(reply, iter);
-+ if (entries) *entries = std::move(reply.entries);
-+ if (more) *more = reply.more;
-+ if (full_part) *full_part = reply.full_part;
-+ if (ptag) *ptag = reply.tag;
-+ } catch (const cb::error& err) {
-+ lderr(cct)
-+ << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " decode failed: " << err.what()
-+ << " tid=" << tid << dendl;
-+ r = from_error_code(err.code());
-+ } else if (r < 0) {
-+ lderr(cct)
-+ << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " fifo::op::LIST_PART failed r=" << r << "
tid=" << tid
-+ << dendl;
-+ }
-+ if (r_out) *r_out = r;
-+ }
-+};
-+
-+lr::ObjectReadOperation list_part(CephContext* cct,
-+ std::optional<std::string_view> tag,
-+ std::uint64_t ofs,
-+ std::uint64_t max_entries,
-+ int* r_out,
-+ std::vector<fifo::part_list_entry>* entries,
-+ bool* more, bool* full_part,
-+ std::string* ptag, std::uint64_t tid)
-+{
-+ lr::ObjectReadOperation op;
-+ fifo::op::list_part lp;
-+
-+ lp.tag = tag;
-+ lp.ofs = ofs;
-+ lp.max_entries = max_entries;
-+
-+ cb::list in;
-+ encode(lp, in);
-+ op.exec(fifo::op::CLASS, fifo::op::LIST_PART, in,
-+ new list_entry_completion(cct, r_out, entries, more, full_part,
-+ ptag, tid));
-+ return op;
-+}
-+
- int get_part_info(lr::IoCtx& ioctx, const std::string& oid,
- fifo::part_header* header,
- std::uint64_t tid, optional_yield y)
-@@ -264,29 +350,131 @@ int get_part_info(lr::IoCtx& ioctx, const std::string&
oid,
- return r;
- }
-
--static void complete(lr::AioCompletion* c_, int r)
-+struct partinfo_completion : public lr::ObjectOperationCompletion {
-+ CephContext* cct;
-+ int* rp;
-+ fifo::part_header* h;
-+ std::uint64_t tid;
-+ partinfo_completion(CephContext* cct, int* rp, fifo::part_header* h,
-+ std::uint64_t tid) :
-+ cct(cct), rp(rp), h(h), tid(tid) {
-+ }
-+ virtual ~partinfo_completion() = default;
-+ void handle_completion(int r, bufferlist& bl) override {
-+ if (r >= 0) try {
-+ fifo::op::get_part_info_reply reply;
-+ auto iter = bl.cbegin();
-+ decode(reply, iter);
-+ if (h) *h = std::move(reply.header);
-+ } catch (const cb::error& err) {
-+ r = from_error_code(err.code());
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " decode failed: " << err.what()
-+ << " tid=" << tid << dendl;
-+ } else {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " fifo::op::GET_PART_INFO failed r=" << r << "
tid=" << tid
-+ << dendl;
-+ }
-+ if (rp) {
-+ *rp = r;
-+ }
-+ }
-+};
-+
-+template<typename T>
-+struct Completion {
-+private:
-+ lr::AioCompletion* _cur = nullptr;
-+ lr::AioCompletion* _super;
-+public:
-+
-+ using Ptr = std::unique_ptr<T>;
-+
-+ lr::AioCompletion* cur() const {
-+ return _cur;
-+ }
-+ lr::AioCompletion* super() const {
-+ return _super;
-+ }
-+
-+ Completion(lr::AioCompletion* super) : _super(super) {
-+ super->pc->get();
-+ }
-+
-+ ~Completion() {
-+ if (_super) {
-+ _super->pc->put();
-+ }
-+ if (_cur)
-+ _cur->release();
-+ _super = nullptr;
-+ _cur = nullptr;
-+ }
-+
-+ // The only times that aio_operate can return an error are:
-+ // 1. The completion contains a null pointer. This should just
-+ // crash, and in our case it does.
-+ // 2. An attempt is made to write to a snapshot. RGW doesn't use
-+ // snapshots, so we don't care.
-+ //
-+ // So we will just assert that initiating an Aio operation succeeds
-+ // and not worry about recovering.
-+ static lr::AioCompletion* call(Ptr&& p) {
-+ p->_cur = lr::Rados::aio_create_completion(static_cast<void*>(p.get()),
-+ &cb);
-+ auto c = p->_cur;
-+ p.release();
-+ return c;
-+ }
-+ static void complete(Ptr&& p, int r) {
-+ auto c = p->_super->pc;
-+ p->_super = nullptr;
-+ c->lock.lock();
-+ c->rval = r;
-+ c->complete = true;
-+ c->lock.unlock();
-+
-+ auto cb_complete = c->callback_complete;
-+ auto cb_complete_arg = c->callback_complete_arg;
-+ if (cb_complete)
-+ cb_complete(c, cb_complete_arg);
-+
-+ auto cb_safe = c->callback_safe;
-+ auto cb_safe_arg = c->callback_safe_arg;
-+ if (cb_safe)
-+ cb_safe(c, cb_safe_arg);
-+
-+ c->lock.lock();
-+ c->callback_complete = nullptr;
-+ c->callback_safe = nullptr;
-+ c->cond.notify_all();
-+ c->put_unlock();
-+ }
-+
-+ static void cb(lr::completion_t, void* arg) {
-+ auto t = static_cast<T*>(arg);
-+ auto r = t->_cur->get_return_value();
-+ t->_cur->release();
-+ t->_cur = nullptr;
-+ t->handle(Ptr(t), r);
-+ }
-+};
-+
-+lr::ObjectReadOperation get_part_info(CephContext* cct,
-+ fifo::part_header* header,
-+ std::uint64_t tid, int* r = 0)
- {
-- auto c = c_->pc;
-- c->lock.lock();
-- c->rval = r;
-- c->complete = true;
-- c->lock.unlock();
--
-- auto cb_complete = c->callback_complete;
-- auto cb_complete_arg = c->callback_complete_arg;
-- if (cb_complete)
-- cb_complete(c, cb_complete_arg);
--
-- auto cb_safe = c->callback_safe;
-- auto cb_safe_arg = c->callback_safe_arg;
-- if (cb_safe)
-- cb_safe(c, cb_safe_arg);
--
-- c->lock.lock();
-- c->callback_complete = NULL;
-- c->callback_safe = NULL;
-- c->cond.notify_all();
-- c->put_unlock();
-+ lr::ObjectReadOperation op;
-+ fifo::op::get_part_info gpi;
-+
-+ cb::list in;
-+ cb::list bl;
-+ encode(gpi, in);
-+ op.exec(fifo::op::CLASS, fifo::op::GET_PART_INFO, in,
-+ new partinfo_completion(cct, r, header, tid));
-+ return op;
-+}
- }
-
- std::optional<marker> FIFO::to_marker(std::string_view s)
-@@ -385,11 +573,8 @@ int FIFO::_update_meta(const fifo::update& update,
- return r;
- }
-
--struct Updater {
-+struct Updater : public Completion<Updater> {
- FIFO* fifo;
-- lr::AioCompletion* super;
-- lr::AioCompletion* cur = lr::Rados::aio_create_completion(
-- static_cast<void*>(this), &FIFO::update_callback);
- fifo::update update;
- fifo::objv version;
- bool reread = false;
-@@ -398,92 +583,74 @@ struct Updater {
- Updater(FIFO* fifo, lr::AioCompletion* super,
- const fifo::update& update, fifo::objv version,
- bool* pcanceled, std::uint64_t tid)
-- : fifo(fifo), super(super), update(update), version(version),
-- pcanceled(pcanceled), tid(tid) {
-- super->pc->get();
-- }
-- ~Updater() {
-- cur->release();
-- }
--};
--
--void FIFO::update_callback(lr::completion_t, void* arg)
--{
-- std::unique_ptr<Updater> updater(static_cast<Updater*>(arg));
-- auto cct = updater->fifo->cct;
-- auto tid = updater->tid;
-- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " entering: tid=" << tid << dendl;
-- if (!updater->reread) {
-- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-- << " handling async update_meta: tid="
-- << tid << dendl;
-- int r = updater->cur->get_return_value();
-+ : Completion(super), fifo(fifo), update(update), version(version),
-+ pcanceled(pcanceled) {}
-+
-+ void handle(Ptr&& p, int r) {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " entering: tid=" << tid << dendl;
-+ if (reread)
-+ handle_reread(std::move(p), r);
-+ else
-+ handle_update(std::move(p), r);
-+ }
-+
-+ void handle_update(Ptr&& p, int r) {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " handling async update_meta: tid="
-+ << tid << dendl;
- if (r < 0 && r != -ECANCELED) {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
- << " update failed: r=" << r << " tid="
<< tid << dendl;
-- complete(updater->super, r);
-+ complete(std::move(p), r);
- return;
- }
- bool canceled = (r == -ECANCELED);
- if (!canceled) {
-- int r = updater->fifo->apply_update(&updater->fifo->info,
-- updater->version,
-- updater->update, tid);
-+ int r = fifo->apply_update(&fifo->info, version, update, tid);
- if (r < 0) {
-- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " update failed, marking canceled: r=" << r <<
" tid="
-- << tid << dendl;
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " update failed, marking canceled: r=" << r
-+ << " tid=" << tid << dendl;
- canceled = true;
- }
- }
- if (canceled) {
-- updater->cur->release();
-- updater->cur = lr::Rados::aio_create_completion(
-- arg, &FIFO::update_callback);
-- updater->reread = true;
-- auto r = updater->fifo->read_meta(tid, updater->cur);
-- if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " failed dispatching read_meta: r=" << r << "
tid="
-- << tid << dendl;
-- complete(updater->super, r);
-- } else {
-- updater.release();
-- }
-+ reread = true;
-+ fifo->read_meta(tid, call(std::move(p)));
- return;
- }
-- if (updater->pcanceled)
-- *updater->pcanceled = false;
-- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-- << " completing: tid=" << tid << dendl;
-- complete(updater->super, 0);
-- return;
-- }
--
-- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " handling async read_meta: tid="
-- << tid << dendl;
-- int r = updater->cur->get_return_value();
-- if (r < 0 && updater->pcanceled) {
-- *updater->pcanceled = false;
-- } else if (r >= 0 && updater->pcanceled) {
-- *updater->pcanceled = true;
-- }
-- if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " failed dispatching read_meta: r=" << r <<
" tid="
-- << tid << dendl;
-- } else {
-- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-- << " completing: tid=" << tid << dendl;
-+ if (pcanceled)
-+ *pcanceled = false;
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " completing: tid=" << tid << dendl;
-+ complete(std::move(p), 0);
-+ }
-+
-+ void handle_reread(Ptr&& p, int r) {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " handling async read_meta: tid="
-+ << tid << dendl;
-+ if (r < 0 && pcanceled) {
-+ *pcanceled = false;
-+ } else if (r >= 0 && pcanceled) {
-+ *pcanceled = true;
-+ }
-+ if (r < 0) {
-+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " failed dispatching read_meta: r=" << r <<
" tid="
-+ << tid << dendl;
-+ } else {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":"
<< __LINE__
-+ << " completing: tid=" << tid << dendl;
-+ }
-+ complete(std::move(p), r);
- }
-- complete(updater->super, r);
--}
-+};
-
--int FIFO::_update_meta(const fifo::update& update,
-- fifo::objv version, bool* pcanceled,
-- std::uint64_t tid, lr::AioCompletion* c)
-+void FIFO::_update_meta(const fifo::update& update,
-+ fifo::objv version, bool* pcanceled,
-+ std::uint64_t tid, lr::AioCompletion* c)
- {
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " entering: tid=" << tid << dendl;
-@@ -491,15 +658,8 @@ int FIFO::_update_meta(const fifo::update& update,
- update_meta(&op, info.version, update);
- auto updater = std::make_unique<Updater>(this, c, update, version, pcanceled,
- tid);
-- auto r = ioctx.aio_operate(oid, updater->cur, &op);
-- if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " failed dispatching update_meta: r=" << r <<
" tid="
-- << tid << dendl;
-- } else {
-- updater.release();
-- }
-- return r;
-+ auto r = ioctx.aio_operate(oid, Updater::call(std::move(updater)), &op);
-+ assert(r >= 0);
- }
-
- int FIFO::create_part(int64_t part_num, std::string_view tag, std::uint64_t tid,
-@@ -509,7 +669,7 @@ int FIFO::create_part(int64_t part_num, std::string_view tag,
std::uint64_t tid,
- << " entering: tid=" << tid << dendl;
- lr::ObjectWriteOperation op;
- op.create(false); /* We don't need exclusivity, part_init ensures
-- we're creating from the same journal entry. */
-+ we're creating from the same journal entry. */
- std::unique_lock l(m);
- part_init(&op, tag, info.params);
- auto oid = info.part_oid(part_num);
-@@ -806,6 +966,209 @@ int FIFO::_prepare_new_head(std::uint64_t tid, optional_yield y)
- return 0;
- }
-
-+struct NewPartPreparer : public Completion<NewPartPreparer> {
-+ FIFO* f;
-+ std::vector<fifo::journal_entry> jentries;
-+ int i = 0;
-+ std::int64_t new_head_part_num;
-+ bool canceled = false;
-+ uint64_t tid;
-+
-+ NewPartPreparer(FIFO* f, lr::AioCompletion* super,
-+ std::vector<fifo::journal_entry> jentries,
-+ std::int64_t new_head_part_num,
-+ std::uint64_t tid)
-+ : Completion(super), f(f), jentries(std::move(jentries)),
-+ new_head_part_num(new_head_part_num), tid(tid) {}
-+
-+ void handle(Ptr&& p, int r) {
-+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " entering: tid=" << tid << dendl;
-+ if (r < 0) {
-+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " _update_meta failed: r=" << r
-+ << " tid=" << tid << dendl;
-+ complete(std::move(p), r);
-+ return;
-+ }
-+
-+ if (canceled) {
-+ std::unique_lock l(f->m);
-+ auto iter = f->info.journal.find(jentries.front().part_num);
-+ auto max_push_part_num = f->info.max_push_part_num;
-+ auto head_part_num = f->info.head_part_num;
-+ auto version = f->info.version;
-+ auto found = (iter != f->info.journal.end());
-+ l.unlock();
-+ if ((max_push_part_num >= jentries.front().part_num &&
-+ head_part_num >= new_head_part_num)) {
-+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " raced, but journaled and processed: i=" << i
-+ << " tid=" << tid << dendl;
-+ complete(std::move(p), 0);
-+ return;
-+ }
-+ if (i >= MAX_RACE_RETRIES) {
-+ complete(std::move(p), -ECANCELED);
-+ return;
-+ }
-+ if (!found) {
-+ ++i;
-+ f->_update_meta(fifo::update{}
-+ .journal_entries_add(jentries),
-+ version, &canceled, tid, call(std::move(p)));
-+ return;
-+ } else {
-+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " raced, journaled but not processed: i=" << i
-+ << " tid=" << tid << dendl;
-+ canceled = false;
-+ }
-+ // Fall through. We still need to process the journal.
-+ }
-+ f->process_journal(tid, super());
-+ return;
-+ }
-+};
-+
-+void FIFO::_prepare_new_part(bool is_head, std::uint64_t tid,
-+ lr::AioCompletion* c)
-+{
-+ std::unique_lock l(m);
-+ std::vector jentries = { info.next_journal_entry(generate_tag()) };
-+ if (info.journal.find(jentries.front().part_num) != info.journal.end()) {
-+ l.unlock();
-+ ldout(cct, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " new part journaled, but not processed: tid="
-+ << tid << dendl;
-+ process_journal(tid, c);
-+ return;
-+ }
-+ std::int64_t new_head_part_num = info.head_part_num;
-+ auto version = info.version;
-+
-+ if (is_head) {
-+ auto new_head_jentry = jentries.front();
-+ new_head_jentry.op = fifo::journal_entry::Op::set_head;
-+ new_head_part_num = jentries.front().part_num;
-+ jentries.push_back(std::move(new_head_jentry));
-+ }
-+ l.unlock();
-+
-+ auto n = std::make_unique<NewPartPreparer>(this, c, jentries,
-+ new_head_part_num, tid);
-+ auto np = n.get();
-+ _update_meta(fifo::update{}.journal_entries_add(jentries), version,
-+ &np->canceled, tid, NewPartPreparer::call(std::move(n)));
-+}
-+
-+struct NewHeadPreparer : public Completion<NewHeadPreparer> {
-+ FIFO* f;
-+ int i = 0;
-+ bool newpart;
-+ std::int64_t new_head_num;
-+ bool canceled = false;
-+ std::uint64_t tid;
-+
-+ NewHeadPreparer(FIFO* f, lr::AioCompletion* super,
-+ bool newpart, std::int64_t new_head_num, std::uint64_t tid)
-+ : Completion(super), f(f), newpart(newpart), new_head_num(new_head_num),
-+ tid(tid) {}
-+
-+ void handle(Ptr&& p, int r) {
-+ if (newpart)
-+ handle_newpart(std::move(p), r);
-+ else
-+ handle_update(std::move(p), r);
-+ }
-+
-+ void handle_newpart(Ptr&& p, int r) {
-+ if (r < 0) {
-+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " _prepare_new_part failed: r=" << r
-+ << " tid=" << tid << dendl;
-+ complete(std::move(p), r);
-+ return;
-+ }
-+ std::unique_lock l(f->m);
-+ if (f->info.max_push_part_num < new_head_num) {
-+ l.unlock();
-+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " _prepare_new_part failed: r=" << r
-+ << " tid=" << tid << dendl;
-+ complete(std::move(p), -EIO);
-+ } else {
-+ l.unlock();
-+ complete(std::move(p), 0);
-+ }
-+ }
-+
-+ void handle_update(Ptr&& p, int r) {
-+ std::unique_lock l(f->m);
-+ auto head_part_num = f->info.head_part_num;
-+ auto version = f->info.version;
-+ l.unlock();
-+
-+ if (r < 0) {
-+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " _update_meta failed: r=" << r
-+ << " tid=" << tid << dendl;
-+ complete(std::move(p), r);
-+ return;
-+ }
-+ if (canceled) {
-+ if (i >= MAX_RACE_RETRIES) {
-+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " canceled too many times, giving up: tid=" << tid
<< dendl;
-+ complete(std::move(p), -ECANCELED);
-+ return;
-+ }
-+
-+ // Raced, but there's still work to do!
-+ if (head_part_num < new_head_num) {
-+ canceled = false;
-+ ++i;
-+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " updating head: i=" << i << " tid="
<< tid << dendl;
-+ f->_update_meta(fifo::update{}.head_part_num(new_head_num),
-+ version, &this->canceled, tid, call(std::move(p)));
-+ return;
-+ }
-+ }
-+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " succeeded : i=" << i << " tid=" <<
tid << dendl;
-+ complete(std::move(p), 0);
-+ return;
-+ }
-+};
-+
-+void FIFO::_prepare_new_head(std::uint64_t tid, lr::AioCompletion* c)
-+{
-+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " entering: tid=" << tid << dendl;
-+ std::unique_lock l(m);
-+ int64_t new_head_num = info.head_part_num + 1;
-+ auto max_push_part_num = info.max_push_part_num;
-+ auto version = info.version;
-+ l.unlock();
-+
-+ if (max_push_part_num < new_head_num) {
-+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " need new part: tid=" << tid << dendl;
-+ auto n = std::make_unique<NewHeadPreparer>(this, c, true, new_head_num,
-+ tid);
-+ _prepare_new_part(true, tid, NewHeadPreparer::call(std::move(n)));
-+ } else {
-+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " updating head: tid=" << tid << dendl;
-+ auto n = std::make_unique<NewHeadPreparer>(this, c, false, new_head_num,
-+ tid);
-+ auto np = n.get();
-+ _update_meta(fifo::update{}.head_part_num(new_head_num), version,
-+ &np->canceled, tid, NewHeadPreparer::call(std::move(n)));
-+ }
-+}
-+
- int FIFO::push_entries(const std::deque<cb::list>& data_bufs,
- std::uint64_t tid, optional_yield y)
- {
-@@ -825,6 +1188,18 @@ int FIFO::push_entries(const std::deque<cb::list>&
data_bufs,
- return r;
- }
-
-+void FIFO::push_entries(const std::deque<cb::list>& data_bufs,
-+ std::uint64_t tid, lr::AioCompletion* c)
-+{
-+ std::unique_lock l(m);
-+ auto head_part_num = info.head_part_num;
-+ auto tag = info.head_tag;
-+ const auto part_oid = info.part_oid(head_part_num);
-+ l.unlock();
-+
-+ push_part(ioctx, part_oid, tag, data_bufs, tid, c);
-+}
-+
- int FIFO::trim_part(int64_t part_num, uint64_t ofs,
- std::optional<std::string_view> tag,
- bool exclusive, std::uint64_t tid,
-@@ -845,10 +1220,10 @@ int FIFO::trim_part(int64_t part_num, uint64_t ofs,
- return 0;
- }
-
--int FIFO::trim_part(int64_t part_num, uint64_t ofs,
-- std::optional<std::string_view> tag,
-- bool exclusive, std::uint64_t tid,
-- lr::AioCompletion* c)
-+void FIFO::trim_part(int64_t part_num, uint64_t ofs,
-+ std::optional<std::string_view> tag,
-+ bool exclusive, std::uint64_t tid,
-+ lr::AioCompletion* c)
- {
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " entering: tid=" << tid << dendl;
-@@ -858,12 +1233,7 @@ int FIFO::trim_part(int64_t part_num, uint64_t ofs,
- l.unlock();
- rgw::cls::fifo::trim_part(&op, tag, ofs, exclusive);
- auto r = ioctx.aio_operate(part_oid, c, &op);
-- if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " failed scheduling trim_part: r=" << r
-- << " tid=" << tid << dendl;
-- }
-- return r;
-+ ceph_assert(r >= 0);
- }
-
- int FIFO::open(lr::IoCtx ioctx, std::string oid, std::unique_ptr<FIFO>* fifo,
-@@ -960,54 +1330,42 @@ int FIFO::read_meta(optional_yield y) {
- return read_meta(tid, y);
- }
-
--struct Reader {
-+struct Reader : public Completion<Reader> {
- FIFO* fifo;
- cb::list bl;
-- lr::AioCompletion* super;
- std::uint64_t tid;
-- lr::AioCompletion* cur = lr::Rados::aio_create_completion(
-- static_cast<void*>(this), &FIFO::read_callback);
- Reader(FIFO* fifo, lr::AioCompletion* super, std::uint64_t tid)
-- : fifo(fifo), super(super), tid(tid) {
-- super->pc->get();
-- }
-- ~Reader() {
-- cur->release();
-- }
--};
-+ : Completion(super), fifo(fifo), tid(tid) {}
-
--void FIFO::read_callback(lr::completion_t, void* arg)
--{
-- std::unique_ptr<Reader> reader(static_cast<Reader*>(arg));
-- auto cct = reader->fifo->cct;
-- auto tid = reader->tid;
-- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " entering: tid=" << tid << dendl;
-- auto r = reader->cur->get_return_value();
-- if (r >= 0) try {
-- fifo::op::get_meta_reply reply;
-- auto iter = reader->bl.cbegin();
-- decode(reply, iter);
-- std::unique_lock l(reader->fifo->m);
-- if (reply.info.version.same_or_later(reader->fifo->info.version)) {
-- reader->fifo->info = std::move(reply.info);
-- reader->fifo->part_header_size = reply.part_header_size;
-- reader->fifo->part_entry_overhead = reply.part_entry_overhead;
-- }
-- } catch (const cb::error& err) {
-+ void handle(Ptr&& p, int r) {
-+ auto cct = fifo->cct;
-+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " entering: tid=" << tid << dendl;
-+ if (r >= 0) try {
-+ fifo::op::get_meta_reply reply;
-+ auto iter = bl.cbegin();
-+ decode(reply, iter);
-+ std::unique_lock l(fifo->m);
-+ if (reply.info.version.same_or_later(fifo->info.version)) {
-+ fifo->info = std::move(reply.info);
-+ fifo->part_header_size = reply.part_header_size;
-+ fifo->part_entry_overhead = reply.part_entry_overhead;
-+ }
-+ } catch (const cb::error& err) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " failed to decode response err=" << err.what()
-+ << " tid=" << tid << dendl;
-+ r = from_error_code(err.code());
-+ } else {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " failed to decode response err=" << err.what()
-+ << " read_meta failed r=" << r
- << " tid=" << tid << dendl;
-- r = from_error_code(err.code());
-- } else {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " read_meta failed r=" << r
-- << " tid=" << tid << dendl;
-+ }
-+ complete(std::move(p), r);
- }
-- complete(reader->super, r);
--}
-+};
-
--int FIFO::read_meta(std::uint64_t tid, lr::AioCompletion* c)
-+void FIFO::read_meta(std::uint64_t tid, lr::AioCompletion* c)
- {
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " entering: tid=" << tid << dendl;
-@@ -1016,16 +1374,10 @@ int FIFO::read_meta(std::uint64_t tid, lr::AioCompletion* c)
- cb::list in;
- encode(gm, in);
- auto reader = std::make_unique<Reader>(this, c, tid);
-- auto r = ioctx.aio_exec(oid, reader->cur, fifo::op::CLASS,
-- fifo::op::GET_META, in, &reader->bl);
-- if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " failed scheduling read_meta r=" << r
-- << " tid=" << tid << dendl;
-- } else {
-- reader.release();
-- }
-- return r;
-+ auto rp = reader.get();
-+ auto r = ioctx.aio_exec(oid, Reader::call(std::move(reader)), fifo::op::CLASS,
-+ fifo::op::GET_META, in, &rp->bl);
-+ assert(r >= 0);
- }
-
- const fifo::info& FIFO::meta() const {
-@@ -1040,6 +1392,10 @@ int FIFO::push(const cb::list& bl, optional_yield y) {
- return push(std::vector{ bl }, y);
- }
-
-+void FIFO::push(const cb::list& bl, lr::AioCompletion* c) {
-+ push(std::vector{ bl }, c);
-+}
-+
- int FIFO::push(const std::vector<cb::list>& data_bufs, optional_yield y)
- {
- std::unique_lock l(m);
-@@ -1153,24 +1509,185 @@ int FIFO::push(const std::vector<cb::list>& data_bufs,
optional_yield y)
- return 0;
- }
-
--int FIFO::list(int max_entries,
-- std::optional<std::string_view> markstr,
-- std::vector<list_entry>* presult, bool* pmore,
-- optional_yield y)
--{
-- std::unique_lock l(m);
-- auto tid = ++next_tid;
-- std::int64_t part_num = info.tail_part_num;
-- l.unlock();
-- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " entering: tid=" << tid << dendl;
-- std::uint64_t ofs = 0;
-- if (markstr) {
-- auto marker = to_marker(*markstr);
-- if (!marker) {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " invalid marker string: " << markstr
-- << " tid= "<< tid << dendl;
-+struct Pusher : public Completion<Pusher> {
-+ FIFO* f;
-+ std::deque<cb::list> remaining;
-+ std::deque<cb::list> batch;
-+ int i = 0;
-+ std::uint64_t tid;
-+ bool new_heading = false;
-+
-+ void prep_then_push(Ptr&& p, const unsigned successes) {
-+ std::unique_lock l(f->m);
-+ auto max_part_size = f->info.params.max_part_size;
-+ auto part_entry_overhead = f->part_entry_overhead;
-+ l.unlock();
-+
-+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " preparing push: remaining=" << remaining.size()
-+ << " batch=" << batch.size() << " i="
<< i
-+ << " tid=" << tid << dendl;
-+
-+ uint64_t batch_len = 0;
-+ if (successes > 0) {
-+ if (successes == batch.size()) {
-+ batch.clear();
-+ } else {
-+ batch.erase(batch.begin(), batch.begin() + successes);
-+ for (const auto& b : batch) {
-+ batch_len += b.length() + part_entry_overhead;
-+ }
-+ }
-+ }
-+
-+ if (batch.empty() && remaining.empty()) {
-+ complete(std::move(p), 0);
-+ return;
-+ }
-+
-+ while (!remaining.empty() &&
-+ (remaining.front().length() + batch_len <= max_part_size)) {
-+
-+ /* We can send entries with data_len up to max_entry_size,
-+ however, we want to also account the overhead when
-+ dealing with multiple entries. Previous check doesn't
-+ account for overhead on purpose. */
-+ batch_len += remaining.front().length() + part_entry_overhead;
-+ batch.push_back(std::move(remaining.front()));
-+ remaining.pop_front();
-+ }
-+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " prepared push: remaining=" << remaining.size()
-+ << " batch=" << batch.size() << " i="
<< i
-+ << " batch_len=" << batch_len
-+ << " tid=" << tid << dendl;
-+ push(std::move(p));
-+ }
-+
-+ void push(Ptr&& p) {
-+ f->push_entries(batch, tid, call(std::move(p)));
-+ }
-+
-+ void new_head(Ptr&& p) {
-+ new_heading = true;
-+ f->_prepare_new_head(tid, call(std::move(p)));
-+ }
-+
-+ void handle(Ptr&& p, int r) {
-+ if (!new_heading) {
-+ if (r == -ERANGE) {
-+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " need new head tid=" << tid << dendl;
-+ new_head(std::move(p));
-+ return;
-+ }
-+ if (r < 0) {
-+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " push_entries failed: r=" << r
-+ << " tid=" << tid << dendl;
-+ complete(std::move(p), r);
-+ return;
-+ }
-+ i = 0; // We've made forward progress, so reset the race counter!
-+ prep_then_push(std::move(p), r);
-+ } else {
-+ if (r < 0) {
-+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " prepare_new_head failed: r=" << r
-+ << " tid=" << tid << dendl;
-+ complete(std::move(p), r);
-+ return;
-+ }
-+ new_heading = false;
-+ handle_new_head(std::move(p), r);
-+ }
-+ }
-+
-+ void handle_new_head(Ptr&& p, int r) {
-+ if (r == -ECANCELED) {
-+ if (p->i == MAX_RACE_RETRIES) {
-+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " canceled too many times, giving up: tid=" << tid
<< dendl;
-+ complete(std::move(p), -ECANCELED);
-+ return;
-+ }
-+ ++p->i;
-+ } else if (r) {
-+ complete(std::move(p), r);
-+ return;
-+ }
-+
-+ if (p->batch.empty()) {
-+ prep_then_push(std::move(p), 0);
-+ return;
-+ } else {
-+ push(std::move(p));
-+ return;
-+ }
-+ }
-+
-+ Pusher(FIFO* f, std::deque<cb::list>&& remaining,
-+ std::uint64_t tid, lr::AioCompletion* super)
-+ : Completion(super), f(f), remaining(std::move(remaining)),
-+ tid(tid) {}
-+};
-+
-+void FIFO::push(const std::vector<cb::list>& data_bufs,
-+ lr::AioCompletion* c)
-+{
-+ std::unique_lock l(m);
-+ auto tid = ++next_tid;
-+ auto max_entry_size = info.params.max_entry_size;
-+ auto need_new_head = info.need_new_head();
-+ l.unlock();
-+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " entering: tid=" << tid << dendl;
-+ auto p = std::make_unique<Pusher>(this,
std::deque<cb::list>(data_bufs.begin(), data_bufs.end()),
-+ tid, c);
-+ // Validate sizes
-+ for (const auto& bl : data_bufs) {
-+ if (bl.length() > max_entry_size) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " entry bigger than max_entry_size tid=" << tid <<
dendl;
-+ Pusher::complete(std::move(p), -E2BIG);
-+ return;
-+ }
-+ }
-+
-+ if (data_bufs.empty() ) {
-+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " empty push, returning success tid=" << tid <<
dendl;
-+ Pusher::complete(std::move(p), 0);
-+ return;
-+ }
-+
-+ if (need_new_head) {
-+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " need new head tid=" << tid << dendl;
-+ p->new_head(std::move(p));
-+ } else {
-+ p->prep_then_push(std::move(p), 0);
-+ }
-+}
-+
-+int FIFO::list(int max_entries,
-+ std::optional<std::string_view> markstr,
-+ std::vector<list_entry>* presult, bool* pmore,
-+ optional_yield y)
-+{
-+ std::unique_lock l(m);
-+ auto tid = ++next_tid;
-+ std::int64_t part_num = info.tail_part_num;
-+ l.unlock();
-+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " entering: tid=" << tid << dendl;
-+ std::uint64_t ofs = 0;
-+ if (markstr) {
-+ auto marker = to_marker(*markstr);
-+ if (!marker) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " invalid marker string: " << markstr
-+ << " tid= "<< tid << dendl;
- return -EINVAL;
- }
- part_num = marker->num;
-@@ -1340,157 +1857,116 @@ int FIFO::trim(std::string_view markstr, bool exclusive,
optional_yield y)
- return 0;
- }
-
--struct Trimmer {
-+struct Trimmer : public Completion<Trimmer> {
- FIFO* fifo;
- std::int64_t part_num;
- std::uint64_t ofs;
- std::int64_t pn;
- bool exclusive;
-- lr::AioCompletion* super;
- std::uint64_t tid;
-- lr::AioCompletion* cur = lr::Rados::aio_create_completion(
-- static_cast<void*>(this), &FIFO::trim_callback);
- bool update = false;
- bool canceled = false;
- int retries = 0;
-
- Trimmer(FIFO* fifo, std::int64_t part_num, std::uint64_t ofs, std::int64_t pn,
- bool exclusive, lr::AioCompletion* super, std::uint64_t tid)
-- : fifo(fifo), part_num(part_num), ofs(ofs), pn(pn), exclusive(exclusive),
-- super(super), tid(tid) {
-- super->pc->get();
-- }
-- ~Trimmer() {
-- cur->release();
-- }
--};
-+ : Completion(super), fifo(fifo), part_num(part_num), ofs(ofs), pn(pn),
-+ exclusive(exclusive), tid(tid) {}
-
--void FIFO::trim_callback(lr::completion_t, void* arg)
--{
-- std::unique_ptr<Trimmer> trimmer(static_cast<Trimmer*>(arg));
-- auto cct = trimmer->fifo->cct;
-- auto tid = trimmer->tid;
-- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " entering: tid=" << tid << dendl;
-- int r = trimmer->cur->get_return_value();
-- if (r == -ENOENT) {
-- r = 0;
-- }
--
-- if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " trim failed: r=" << r << " tid="
<< tid << dendl;
-- complete(trimmer->super, r);
-- return;
-- }
--
-- if (!trimmer->update) {
-+ void handle(Ptr&& p, int r) {
-+ auto cct = fifo->cct;
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-- << " handling preceding trim callback: tid=" << tid <<
dendl;
-- trimmer->retries = 0;
-- if (trimmer->pn < trimmer->part_num) {
-- std::unique_lock l(trimmer->fifo->m);
-- const auto max_part_size = trimmer->fifo->info.params.max_part_size;
-- l.unlock();
-- trimmer->cur->release();
-- trimmer->cur = lr::Rados::aio_create_completion(arg,
&FIFO::trim_callback);
-- r = trimmer->fifo->trim_part(trimmer->pn++, max_part_size, std::nullopt,
-- false, tid, trimmer->cur);
-- if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " trim failed: r=" << r << " tid="
<< tid << dendl;
-- complete(trimmer->super, r);
-- } else {
-- trimmer.release();
-- }
-- return;
-+ << " entering: tid=" << tid << dendl;
-+ if (r == -ENOENT) {
-+ r = 0;
- }
-
-- std::unique_lock l(trimmer->fifo->m);
-- const auto tail_part_num = trimmer->fifo->info.tail_part_num;
-- l.unlock();
-- trimmer->cur->release();
-- trimmer->cur = lr::Rados::aio_create_completion(arg, &FIFO::trim_callback);
-- trimmer->update = true;
-- trimmer->canceled = tail_part_num < trimmer->part_num;
-- r = trimmer->fifo->trim_part(trimmer->part_num, trimmer->ofs,
-- std::nullopt, trimmer->exclusive, tid, trimmer->cur);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " failed scheduling trim: r=" << r << "
tid=" << tid << dendl;
-- complete(trimmer->super, r);
-- } else {
-- trimmer.release();
-+ << (update ? " update_meta " : " trim ") <<
"failed: r="
-+ << r << " tid=" << tid << dendl;
-+ complete(std::move(p), r);
-+ return;
- }
-- return;
-- }
-
-- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " handling update-needed callback: tid=" << tid <<
dendl;
-- std::unique_lock l(trimmer->fifo->m);
-- auto tail_part_num = trimmer->fifo->info.tail_part_num;
-- auto objv = trimmer->fifo->info.version;
-- l.unlock();
-- if ((tail_part_num < trimmer->part_num) &&
-- trimmer->canceled) {
-- if (trimmer->retries > MAX_RACE_RETRIES) {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " canceled too many times, giving up: tid=" << tid <<
dendl;
-- complete(trimmer->super, -EIO);
-+ if (!update) {
-+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " handling preceding trim callback: tid=" << tid
<< dendl;
-+ retries = 0;
-+ if (pn < part_num) {
-+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " pn=" << pn << " tid=" << tid
<< dendl;
-+ std::unique_lock l(fifo->m);
-+ const auto max_part_size = fifo->info.params.max_part_size;
-+ l.unlock();
-+ fifo->trim_part(pn++, max_part_size, std::nullopt,
-+ false, tid, call(std::move(p)));
-+ return;
-+ }
-+
-+ std::unique_lock l(fifo->m);
-+ const auto tail_part_num = fifo->info.tail_part_num;
-+ l.unlock();
-+ update = true;
-+ canceled = tail_part_num < part_num;
-+ fifo->trim_part(part_num, ofs, std::nullopt, exclusive, tid,
-+ call(std::move(p)));
- return;
- }
-- trimmer->cur->release();
-- trimmer->cur = lr::Rados::aio_create_completion(arg,
-- &FIFO::trim_callback);
-- ++trimmer->retries;
-- r = trimmer->fifo->_update_meta(fifo::update{}
-- .tail_part_num(trimmer->part_num),
-- objv, &trimmer->canceled,
-- tid, trimmer->cur);
-- if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " failed scheduling _update_meta: r="
-- << r << " tid=" << tid << dendl;
-- complete(trimmer->super, r);
-+
-+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " handling update-needed callback: tid=" << tid <<
dendl;
-+ std::unique_lock l(fifo->m);
-+ auto tail_part_num = fifo->info.tail_part_num;
-+ auto objv = fifo->info.version;
-+ l.unlock();
-+ if ((tail_part_num < part_num) &&
-+ canceled) {
-+ if (retries > MAX_RACE_RETRIES) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " canceled too many times, giving up: tid=" << tid
<< dendl;
-+ complete(std::move(p), -EIO);
-+ return;
-+ }
-+ ++retries;
-+ fifo->_update_meta(fifo::update{}
-+ .tail_part_num(part_num), objv, &canceled,
-+ tid, call(std::move(p)));
- } else {
-- trimmer.release();
-+ complete(std::move(p), 0);
- }
-- } else {
-- complete(trimmer->super, 0);
- }
--}
-+};
-
--int FIFO::trim(std::string_view markstr, bool exclusive, lr::AioCompletion* c) {
-+void FIFO::trim(std::string_view markstr, bool exclusive,
-+ lr::AioCompletion* c) {
- auto marker = to_marker(markstr);
-- if (!marker) {
-- return -EINVAL;
-- }
-+ auto realmark = marker.value_or(::rgw::cls::fifo::marker{});
- std::unique_lock l(m);
- const auto max_part_size = info.params.max_part_size;
- const auto pn = info.tail_part_num;
- const auto part_oid = info.part_oid(pn);
- auto tid = ++next_tid;
- l.unlock();
-- auto trimmer = std::make_unique<Trimmer>(this, marker->num, marker->ofs,
pn, exclusive, c,
-- tid);
-+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " entering: tid=" << tid << dendl;
-+ auto trimmer = std::make_unique<Trimmer>(this, realmark.num, realmark.ofs,
-+ pn, exclusive, c, tid);
-+ if (!marker) {
-+ Trimmer::complete(std::move(trimmer), -EINVAL);
-+ return;
-+ }
- ++trimmer->pn;
- auto ofs = marker->ofs;
- if (pn < marker->num) {
-+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " pn=" << pn << " tid=" << tid
<< dendl;
- ofs = max_part_size;
- } else {
- trimmer->update = true;
- }
-- auto r = trim_part(pn, ofs, std::nullopt, exclusive,
-- tid, trimmer->cur);
-- if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " failed scheduling trim_part: r="
-- << r << " tid=" << tid << dendl;
-- complete(trimmer->super, r);
-- } else {
-- trimmer.release();
-- }
-- return r;
-+ trim_part(pn, ofs, std::nullopt, exclusive,
-+ tid, Trimmer::call(std::move(trimmer)));
- }
-
- int FIFO::get_part_info(int64_t part_num,
-@@ -1509,4 +1985,521 @@ int FIFO::get_part_info(int64_t part_num,
- }
- return r;
- }
-+
-+void FIFO::get_part_info(int64_t part_num,
-+ fifo::part_header* header,
-+ lr::AioCompletion* c)
-+{
-+ std::unique_lock l(m);
-+ const auto part_oid = info.part_oid(part_num);
-+ auto tid = ++next_tid;
-+ l.unlock();
-+ auto op = rgw::cls::fifo::get_part_info(cct, header, tid);
-+ auto r = ioctx.aio_operate(part_oid, c, &op, nullptr);
-+ ceph_assert(r >= 0);
-+}
-+
-+struct InfoGetter : Completion<InfoGetter> {
-+ FIFO* fifo;
-+ fifo::part_header header;
-+ fu2::function<void(int r, fifo::part_header&&)> f;
-+ std::uint64_t tid;
-+ bool headerread = false;
-+
-+ InfoGetter(FIFO* fifo, fu2::function<void(int r, fifo::part_header&&)>
f,
-+ std::uint64_t tid, lr::AioCompletion* super)
-+ : Completion(super), fifo(fifo), f(std::move(f)), tid(tid) {}
-+ void handle(Ptr&& p, int r) {
-+ if (!headerread) {
-+ if (r < 0) {
-+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " read_meta failed: r="
-+ << r << " tid=" << tid << dendl;
-+ if (f)
-+ f(r, {});
-+ complete(std::move(p), r);
-+ return;
-+ }
-+
-+ auto info = fifo->meta();
-+ auto hpn = info.head_part_num;
-+ if (hpn < 0) {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " no head, returning empty partinfo r="
-+ << r << " tid=" << tid << dendl;
-+ if (f)
-+ f(0, {});
-+ complete(std::move(p), r);
-+ return;
-+ }
-+ headerread = true;
-+ auto op = rgw::cls::fifo::get_part_info(fifo->cct, &header, tid);
-+ std::unique_lock l(fifo->m);
-+ auto oid = fifo->info.part_oid(hpn);
-+ l.unlock();
-+ r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op,
-+ nullptr);
-+ ceph_assert(r >= 0);
-+ return;
-+ }
-+
-+ if (r < 0) {
-+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " get_part_info failed: r="
-+ << r << " tid=" << tid << dendl;
-+ }
-+
-+ if (f)
-+ f(r, std::move(header));
-+ complete(std::move(p), r);
-+ return;
-+ }
-+};
-+
-+void FIFO::get_head_info(fu2::unique_function<void(int r,
-+ fifo::part_header&&)> f,
-+ lr::AioCompletion* c)
-+{
-+ std::unique_lock l(m);
-+ auto tid = ++next_tid;
-+ l.unlock();
-+ auto ig = std::make_unique<InfoGetter>(this, std::move(f), tid, c);
-+ read_meta(tid, InfoGetter::call(std::move(ig)));
-+}
-+
-+struct JournalProcessor : public Completion<JournalProcessor> {
-+private:
-+ FIFO* const fifo;
-+
-+ std::vector<fifo::journal_entry> processed;
-+ std::multimap<std::int64_t, fifo::journal_entry> journal;
-+ std::multimap<std::int64_t, fifo::journal_entry>::iterator iter;
-+ std::int64_t new_tail;
-+ std::int64_t new_head;
-+ std::int64_t new_max;
-+ int race_retries = 0;
-+ bool first_pp = true;
-+ bool canceled = false;
-+ std::uint64_t tid;
-+
-+ enum {
-+ entry_callback,
-+ pp_callback,
-+ } state;
-+
-+ void create_part(Ptr&& p, int64_t part_num,
-+ std::string_view tag) {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " entering: tid=" << tid << dendl;
-+ state = entry_callback;
-+ lr::ObjectWriteOperation op;
-+ op.create(false); /* We don't need exclusivity, part_init ensures
-+ we're creating from the same journal entry. */
-+ std::unique_lock l(fifo->m);
-+ part_init(&op, tag, fifo->info.params);
-+ auto oid = fifo->info.part_oid(part_num);
-+ l.unlock();
-+ auto r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op);
-+ ceph_assert(r >= 0);
-+ return;
-+ }
-+
-+ void remove_part(Ptr&& p, int64_t part_num,
-+ std::string_view tag) {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " entering: tid=" << tid << dendl;
-+ state = entry_callback;
-+ lr::ObjectWriteOperation op;
-+ op.remove();
-+ std::unique_lock l(fifo->m);
-+ auto oid = fifo->info.part_oid(part_num);
-+ l.unlock();
-+ auto r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op);
-+ ceph_assert(r >= 0);
-+ return;
-+ }
-+
-+ void finish_je(Ptr&& p, int r,
-+ const fifo::journal_entry& entry) {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " entering: tid=" << tid << dendl;
-+
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " finishing entry: entry=" << entry
-+ << " tid=" << tid << dendl;
-+
-+ if (entry.op == fifo::journal_entry::Op::remove && r == -ENOENT)
-+ r = 0;
-+
-+ if (r < 0) {
-+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " processing entry failed: entry=" << entry
-+ << " r=" << r << " tid=" << tid
<< dendl;
-+ complete(std::move(p), r);
-+ return;
-+ } else {
-+ switch (entry.op) {
-+ case fifo::journal_entry::Op::unknown:
-+ case fifo::journal_entry::Op::set_head:
-+ // Can't happen. Filtered out in process.
-+ complete(std::move(p), -EIO);
-+ return;
-+
-+ case fifo::journal_entry::Op::create:
-+ if (entry.part_num > new_max) {
-+ new_max = entry.part_num;
-+ }
-+ break;
-+ case fifo::journal_entry::Op::remove:
-+ if (entry.part_num >= new_tail) {
-+ new_tail = entry.part_num + 1;
-+ }
-+ break;
-+ }
-+ processed.push_back(entry);
-+ }
-+ ++iter;
-+ process(std::move(p));
-+ }
-+
-+ void postprocess(Ptr&& p) {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " entering: tid=" << tid << dendl;
-+ if (processed.empty()) {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":"
<< __LINE__
-+ << " nothing to update any more: race_retries="
-+ << race_retries << " tid=" << tid << dendl;
-+ complete(std::move(p), 0);
-+ return;
-+ }
-+ pp_run(std::move(p), 0, false);
-+ }
-+
-+public:
-+
-+ JournalProcessor(FIFO* fifo, std::uint64_t tid, lr::AioCompletion* super)
-+ : Completion(super), fifo(fifo), tid(tid) {
-+ std::unique_lock l(fifo->m);
-+ journal = fifo->info.journal;
-+ iter = journal.begin();
-+ new_tail = fifo->info.tail_part_num;
-+ new_head = fifo->info.head_part_num;
-+ new_max = fifo->info.max_push_part_num;
-+ }
-+
-+ void pp_run(Ptr&& p, int r, bool canceled) {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " entering: tid=" << tid << dendl;
-+ std::optional<int64_t> tail_part_num;
-+ std::optional<int64_t> head_part_num;
-+ std::optional<int64_t> max_part_num;
-+
-+ if (r < 0) {
-+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " failed, r=: " << r << " tid="
<< tid << dendl;
-+ complete(std::move(p), r);
-+ }
-+
-+
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " postprocessing: race_retries="
-+ << race_retries << " tid=" << tid << dendl;
-+
-+ if (!first_pp && r == 0 && !canceled) {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":"
<< __LINE__
-+ << " nothing to update any more: race_retries="
-+ << race_retries << " tid=" << tid << dendl;
-+ complete(std::move(p), 0);
-+ return;
-+ }
-+
-+ first_pp = false;
-+
-+ if (canceled) {
-+ if (race_retries >= MAX_RACE_RETRIES) {
-+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " canceled too many times, giving up: tid="
-+ << tid << dendl;
-+ complete(std::move(p), -ECANCELED);
-+ return;
-+ }
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":"
<< __LINE__
-+ << " update canceled, retrying: race_retries="
-+ << race_retries << " tid=" << tid << dendl;
-+
-+ ++race_retries;
-+
-+ std::vector<fifo::journal_entry> new_processed;
-+ std::unique_lock l(fifo->m);
-+ for (auto& e : processed) {
-+ auto jiter = fifo->info.journal.find(e.part_num);
-+ /* journal entry was already processed */
-+ if (jiter == fifo->info.journal.end() ||
-+ !(jiter->second == e)) {
-+ continue;
-+ }
-+ new_processed.push_back(e);
-+ }
-+ processed = std::move(new_processed);
-+ }
-+
-+ std::unique_lock l(fifo->m);
-+ auto objv = fifo->info.version;
-+ if (new_tail > fifo->info.tail_part_num) {
-+ tail_part_num = new_tail;
-+ }
-+
-+ if (new_head > fifo->info.head_part_num) {
-+ head_part_num = new_head;
-+ }
-+
-+ if (new_max > fifo->info.max_push_part_num) {
-+ max_part_num = new_max;
-+ }
-+ l.unlock();
-+
-+ if (processed.empty() &&
-+ !tail_part_num &&
-+ !max_part_num) {
-+ /* nothing to update anymore */
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":"
<< __LINE__
-+ << " nothing to update any more: race_retries="
-+ << race_retries << " tid=" << tid << dendl;
-+ complete(std::move(p), 0);
-+ return;
-+ }
-+ state = pp_callback;
-+ fifo->_update_meta(fifo::update{}
-+ .tail_part_num(tail_part_num)
-+ .head_part_num(head_part_num)
-+ .max_push_part_num(max_part_num)
-+ .journal_entries_rm(processed),
-+ objv, &this->canceled, tid, call(std::move(p)));
-+ return;
-+ }
-+
-+ JournalProcessor(const JournalProcessor&) = delete;
-+ JournalProcessor& operator =(const JournalProcessor&) = delete;
-+ JournalProcessor(JournalProcessor&&) = delete;
-+ JournalProcessor& operator =(JournalProcessor&&) = delete;
-+
-+ void process(Ptr&& p) {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " entering: tid=" << tid << dendl;
-+ while (iter != journal.end()) {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":"
<< __LINE__
-+ << " processing entry: entry=" << *iter
-+ << " tid=" << tid << dendl;
-+ const auto entry = iter->second;
-+ switch (entry.op) {
-+ case fifo::journal_entry::Op::create:
-+ create_part(std::move(p), entry.part_num, entry.part_tag);
-+ return;
-+ case fifo::journal_entry::Op::set_head:
-+ if (entry.part_num > new_head) {
-+ new_head = entry.part_num;
-+ }
-+ processed.push_back(entry);
-+ ++iter;
-+ continue;
-+ case fifo::journal_entry::Op::remove:
-+ remove_part(std::move(p), entry.part_num, entry.part_tag);
-+ return;
-+ default:
-+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " unknown journaled op: entry=" << entry << "
tid="
-+ << tid << dendl;
-+ complete(std::move(p), -EIO);
-+ return;
-+ }
-+ }
-+ postprocess(std::move(p));
-+ return;
-+ }
-+
-+ void handle(Ptr&& p, int r) {
-+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
-+ << " entering: tid=" << tid << dendl;
-+ switch (state) {
-+ case entry_callback:
-+ finish_je(std::move(p), r, iter->second);
-+ return;
-+ case pp_callback:
-+ auto c = canceled;
-+ canceled = false;
-+ pp_run(std::move(p), r, c);
-+ return;
-+ }
-+
-+ abort();
-+ }
-+
-+};
-+
-+void FIFO::process_journal(std::uint64_t tid, lr::AioCompletion* c) {
-+ auto p = std::make_unique<JournalProcessor>(this, tid, c);
-+ p->process(std::move(p));
-+}
-+
-+struct Lister : Completion<Lister> {
-+ FIFO* f;
-+ std::vector<list_entry> result;
-+ bool more = false;
-+ std::int64_t part_num;
-+ std::uint64_t ofs;
-+ int max_entries;
-+ int r_out = 0;
-+ std::vector<fifo::part_list_entry> entries;
-+ bool part_more = false;
-+ bool part_full = false;
-+ std::vector<list_entry>* entries_out;
-+ bool* more_out;
-+ std::uint64_t tid;
-+
-+ bool read = false;
-+
-+ void complete(Ptr&& p, int r) {
-+ if (r >= 0) {
-+ if (more_out) *more_out = more;
-+ if (entries_out) *entries_out = std::move(result);
-+ }
-+ Completion::complete(std::move(p), r);
-+ }
-+
-+public:
-+ Lister(FIFO* f, std::int64_t part_num, std::uint64_t ofs, int max_entries,
-+ std::vector<list_entry>* entries_out, bool* more_out,
-+ std::uint64_t tid, lr::AioCompletion* super)
-+ : Completion(super), f(f), part_num(part_num), ofs(ofs), max_entries(max_entries),
-+ entries_out(entries_out), more_out(more_out), tid(tid) {
-+ result.reserve(max_entries);
-+ }
-+
-+ Lister(const Lister&) = delete;
-+ Lister& operator =(const Lister&) = delete;
-+ Lister(Lister&&) = delete;
-+ Lister& operator =(Lister&&) = delete;
-+
-+ void handle(Ptr&& p, int r) {
-+ if (read)
-+ handle_read(std::move(p), r);
-+ else
-+ handle_list(std::move(p), r);
-+ }
-+
-+ void list(Ptr&& p) {
-+ if (max_entries > 0) {
-+ part_more = false;
-+ part_full = false;
-+ entries.clear();
-+
-+ std::unique_lock l(f->m);
-+ auto part_oid = f->info.part_oid(part_num);
-+ l.unlock();
-+
-+ read = false;
-+ auto op = list_part(f->cct, {}, ofs, max_entries, &r_out,
-+ &entries, &part_more, &part_full,
-+ nullptr, tid);
-+ f->ioctx.aio_operate(part_oid, call(std::move(p)), &op, nullptr);
-+ } else {
-+ complete(std::move(p), 0);
-+ }
-+ }
-+
-+ void handle_read(Ptr&& p, int r) {
-+ read = false;
-+ if (r >= 0) r = r_out;
-+ r_out = 0;
-+
-+ if (r < 0) {
-+ complete(std::move(p), r);
-+ return;
-+ }
-+
-+ if (part_num < f->info.tail_part_num) {
-+ /* raced with trim? restart */
-+ max_entries += result.size();
-+ result.clear();
-+ part_num = f->info.tail_part_num;
-+ ofs = 0;
-+ list(std::move(p));
-+ return;
-+ }
-+ /* assuming part was not written yet, so end of data */
-+ more = false;
-+ complete(std::move(p), 0);
-+ return;
-+ }
-+
-+ void handle_list(Ptr&& p, int r) {
-+ if (r >= 0) r = r_out;
-+ r_out = 0;
-+ std::unique_lock l(f->m);
-+ auto part_oid = f->info.part_oid(part_num);
-+ l.unlock();
-+ if (r == -ENOENT) {
-+ read = true;
-+ f->read_meta(tid, call(std::move(p)));
-+ return;
-+ }
-+ if (r < 0) {
-+ complete(std::move(p), r);
-+ return;
-+ }
-+
-+ more = part_full || part_more;
-+ for (auto& entry : entries) {
-+ list_entry e;
-+ e.data = std::move(entry.data);
-+ e.marker = marker{part_num, entry.ofs}.to_string();
-+ e.mtime = entry.mtime;
-+ result.push_back(std::move(e));
-+ }
-+ max_entries -= entries.size();
-+ entries.clear();
-+ if (max_entries > 0 && part_more) {
-+ list(std::move(p));
-+ return;
-+ }
-+
-+ if (!part_full) { /* head part is not full */
-+ complete(std::move(p), 0);
-+ return;
-+ }
-+ ++part_num;
-+ ofs = 0;
-+ list(std::move(p));
-+ }
-+};
-+
-+void FIFO::list(int max_entries,
-+ std::optional<std::string_view> markstr,
-+ std::vector<list_entry>* out,
-+ bool* more,
-+ lr::AioCompletion* c) {
-+ std::unique_lock l(m);
-+ auto tid = ++next_tid;
-+ std::int64_t part_num = info.tail_part_num;
-+ l.unlock();
-+ std::uint64_t ofs = 0;
-+ std::optional<::rgw::cls::fifo::marker> marker;
-+
-+ if (markstr) {
-+ marker = to_marker(*markstr);
-+ if (marker) {
-+ part_num = marker->num;
-+ ofs = marker->ofs;
-+ }
-+ }
-+
-+ auto ls = std::make_unique<Lister>(this, part_num, ofs, max_entries, out,
-+ more, tid, c);
-+ if (markstr && !marker) {
-+ auto l = ls.get();
-+ l->complete(std::move(ls), -EINVAL);
-+ } else {
-+ ls->list(std::move(ls));
-+ }
-+}
- }
-diff --git a/src/rgw/cls_fifo_legacy.h b/src/rgw/cls_fifo_legacy.h
-index 1f8d3f3fc95d8..b6b5f04bb30ad 100644
---- a/src/rgw/cls_fifo_legacy.h
-+++ b/src/rgw/cls_fifo_legacy.h
-@@ -31,6 +31,7 @@
-
- #include "include/rados/librados.hpp"
- #include "include/buffer.h"
-+#include "include/function2.hpp"
-
- #include "common/async/yield_context.h"
-
-@@ -57,24 +58,6 @@ int get_meta(lr::IoCtx& ioctx, const std::string& oid,
- std::uint32_t* part_entry_overhead,
- std::uint64_t tid, optional_yield y,
- bool probe = false);
--void update_meta(lr::ObjectWriteOperation* op, const fifo::objv& objv,
-- const fifo::update& update);
--void part_init(lr::ObjectWriteOperation* op, std::string_view tag,
-- fifo::data_params params);
--int push_part(lr::IoCtx& ioctx, const std::string& oid, std::string_view tag,
-- std::deque<cb::list> data_bufs, std::uint64_t tid, optional_yield y);
--void trim_part(lr::ObjectWriteOperation* op,
-- std::optional<std::string_view> tag, std::uint64_t ofs,
-- bool exclusive);
--int list_part(lr::IoCtx& ioctx, const std::string& oid,
-- std::optional<std::string_view> tag, std::uint64_t ofs,
-- std::uint64_t max_entries,
-- std::vector<fifo::part_list_entry>* entries,
-- bool* more, bool* full_part, std::string* ptag,
-- std::uint64_t tid, optional_yield y);
--int get_part_info(lr::IoCtx& ioctx, const std::string& oid,
-- fifo::part_header* header, std::uint64_t,
-- optional_yield y);
-
- struct marker {
- std::int64_t num = 0;
-@@ -117,6 +100,12 @@ class FIFO {
- friend struct Reader;
- friend struct Updater;
- friend struct Trimmer;
-+ friend struct InfoGetter;
-+ friend struct Pusher;
-+ friend struct NewPartPreparer;
-+ friend struct NewHeadPreparer;
-+ friend struct JournalProcessor;
-+ friend struct Lister;
-
- mutable lr::IoCtx ioctx;
- CephContext* cct = static_cast<CephContext*>(ioctx.cct());
-@@ -144,32 +133,34 @@ class FIFO {
- int _update_meta(const fifo::update& update,
- fifo::objv version, bool* pcanceled,
- std::uint64_t tid, optional_yield y);
-- int _update_meta(const fifo::update& update,
-- fifo::objv version, bool* pcanceled,
-- std::uint64_t tid, lr::AioCompletion* c);
-+ void _update_meta(const fifo::update& update,
-+ fifo::objv version, bool* pcanceled,
-+ std::uint64_t tid, lr::AioCompletion* c);
- int create_part(int64_t part_num, std::string_view tag, std::uint64_t tid,
- optional_yield y);
- int remove_part(int64_t part_num, std::string_view tag, std::uint64_t tid,
- optional_yield y);
- int process_journal(std::uint64_t tid, optional_yield y);
-+ void process_journal(std::uint64_t tid, lr::AioCompletion* c);
- int _prepare_new_part(bool is_head, std::uint64_t tid, optional_yield y);
-+ void _prepare_new_part(bool is_head, std::uint64_t tid, lr::AioCompletion* c);
- int _prepare_new_head(std::uint64_t tid, optional_yield y);
-+ void _prepare_new_head(std::uint64_t tid, lr::AioCompletion* c);
- int push_entries(const std::deque<cb::list>& data_bufs,
- std::uint64_t tid, optional_yield y);
-+ void push_entries(const std::deque<cb::list>& data_bufs,
-+ std::uint64_t tid, lr::AioCompletion* c);
- int trim_part(int64_t part_num, uint64_t ofs,
- std::optional<std::string_view> tag, bool exclusive,
- std::uint64_t tid, optional_yield y);
-- int trim_part(int64_t part_num, uint64_t ofs,
-- std::optional<std::string_view> tag, bool exclusive,
-- std::uint64_t tid, lr::AioCompletion* c);
-+ void trim_part(int64_t part_num, uint64_t ofs,
-+ std::optional<std::string_view> tag, bool exclusive,
-+ std::uint64_t tid, lr::AioCompletion* c);
-
-- static void trim_callback(lr::completion_t, void* arg);
-- static void update_callback(lr::completion_t, void* arg);
-- static void read_callback(lr::completion_t, void* arg);
- /// Force refresh of metadata, yielding/blocking style
- int read_meta(std::uint64_t tid, optional_yield y);
- /// Force refresh of metadata, with a librados Completion
-- int read_meta(std::uint64_t tid, lr::AioCompletion* c);
-+ void read_meta(std::uint64_t tid, lr::AioCompletion* c);
-
- public:
-
-@@ -215,12 +206,20 @@ class FIFO {
- int push(const cb::list& bl, //< Entry to push
- optional_yield y //< Optional yield
- );
-- /// Push entres to the FIFO
-+ /// Push an entry to the FIFO
-+ void push(const cb::list& bl, //< Entry to push
-+ lr::AioCompletion* c //< Async Completion
-+ );
-+ /// Push entries to the FIFO
- int push(const std::vector<cb::list>& data_bufs, //< Entries to push
-- /// Optional yield
-- optional_yield y);
-+ optional_yield y //< Optional yield
-+ );
-+ /// Push entries to the FIFO
-+ void push(const std::vector<cb::list>& data_bufs, //< Entries to push
-+ lr::AioCompletion* c //< Async Completion
-+ );
- /// List entries
-- int list(int max_entries, /// Maximum entries to list
-+ int list(int max_entries, //< Maximum entries to list
- /// Point after which to begin listing. Start at tail if null
- std::optional<std::string_view> markstr,
- std::vector<list_entry>* out, //< OUT: entries
-@@ -228,6 +227,14 @@ class FIFO {
- bool* more,
- optional_yield y //< Optional yield
- );
-+ void list(int max_entries, //< Maximum entries to list
-+ /// Point after which to begin listing. Start at tail if null
-+ std::optional<std::string_view> markstr,
-+ std::vector<list_entry>* out, //< OUT: entries
-+ /// OUT: True if more entries in FIFO beyond the last returned
-+ bool* more,
-+ lr::AioCompletion* c //< Async Completion
-+ );
- /// Trim entries, coroutine/block style
- int trim(std::string_view markstr, //< Position to which to trim, inclusive
- bool exclusive, //< If true, do not trim the target entry
-@@ -235,16 +242,28 @@ class FIFO {
- optional_yield y //< Optional yield
- );
- /// Trim entries, librados AioCompletion style
-- int trim(std::string_view markstr, //< Position to which to trim, inclusive
-- bool exclusive, //< If true, do not trim the target entry
-- //< itself, just all those before it.
-- lr::AioCompletion* c //< librados AIO Completion
-+ void trim(std::string_view markstr, //< Position to which to trim, inclusive
-+ bool exclusive, //< If true, do not trim the target entry
-+ //< itself, just all those before it.
-+ lr::AioCompletion* c //< librados AIO Completion
- );
- /// Get part info
- int get_part_info(int64_t part_num, /// Part number
- fifo::part_header* header, //< OUT: Information
- optional_yield y //< Optional yield
- );
-+ /// Get part info
-+ void get_part_info(int64_t part_num, //< Part number
-+ fifo::part_header* header, //< OUT: Information
-+ lr::AioCompletion* c //< AIO Completion
-+ );
-+ /// A convenience method to fetch the part information for the FIFO
-+ /// head, using librados::AioCompletion, since
-+ /// libradio::AioCompletions compose lousily.
-+ void get_head_info(fu2::unique_function< //< Function to receive info
-+ void(int r, fifo::part_header&&)>,
-+ lr::AioCompletion* c //< AIO Completion
-+ );
- };
- }
-
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index a875d075ecade..8142b26e01a8b 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -469,12 +469,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- pc->cond.notify_all();
- pc->put_unlock();
- } else {
-- r = fifos[index]->trim(marker, false, c);
-- if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__
-- << ": unable to trim FIFO: " << get_oid(index)
-- << ": " << cpp_strerror(-r) << dendl;
-- }
-+ fifos[index]->trim(marker, false, c);
- }
- return r;
- }
-diff --git a/src/test/rgw/test_cls_fifo_legacy.cc b/src/test/rgw/test_cls_fifo_legacy.cc
-index dae4980f8dca4..69cee5a887405 100644
---- a/src/test/rgw/test_cls_fifo_legacy.cc
-+++ b/src/test/rgw/test_cls_fifo_legacy.cc
-@@ -69,6 +69,8 @@ class LegacyFIFO : public testing::Test {
- };
-
- using LegacyClsFIFO = LegacyFIFO;
-+using AioLegacyFIFO = LegacyFIFO;
-+
-
- TEST_F(LegacyClsFIFO, TestCreate)
- {
-@@ -577,8 +579,7 @@ TEST_F(LegacyFIFO, TestAioTrim)
- marker = result.front().marker;
- std::unique_ptr<R::AioCompletion> c(rados.aio_create_completion(nullptr,
- nullptr));
-- r = f->trim(*marker, false, c.get());
-- ASSERT_EQ(0, r);
-+ f->trim(*marker, false, c.get());
- c->wait_for_complete();
- r = c->get_return_value();
- ASSERT_EQ(0, r);
-@@ -645,3 +646,482 @@ TEST_F(LegacyFIFO, TestTrimExclusive) {
- ASSERT_EQ(result.size(), 1);
- ASSERT_EQ(max_entries - 1, val);
- }
-+
-+TEST_F(AioLegacyFIFO, TestPushListTrim)
-+{
-+ std::unique_ptr<RCf::FIFO> f;
-+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield);
-+ ASSERT_EQ(0, r);
-+ static constexpr auto max_entries = 10u;
-+ for (uint32_t i = 0; i < max_entries; ++i) {
-+ cb::list bl;
-+ encode(i, bl);
-+ auto c = R::Rados::aio_create_completion();
-+ f->push(bl, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+ }
-+
-+ std::optional<std::string> marker;
-+ /* get entries one by one */
-+ std::vector<RCf::list_entry> result;
-+ bool more = false;
-+ for (auto i = 0u; i < max_entries; ++i) {
-+ auto c = R::Rados::aio_create_completion();
-+ f->list(1, marker, &result, &more, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+
-+ bool expected_more = (i != (max_entries - 1));
-+ ASSERT_EQ(expected_more, more);
-+ ASSERT_EQ(1, result.size());
-+
-+ std::uint32_t val;
-+ std::tie(val, marker) = decode_entry<std::uint32_t>(result.front());
-+
-+ ASSERT_EQ(i, val);
-+ result.clear();
-+ }
-+
-+ /* get all entries at once */
-+ std::string markers[max_entries];
-+ std::uint32_t min_entry = 0;
-+ auto c = R::Rados::aio_create_completion();
-+ f->list(max_entries * 10, std::nullopt, &result, &more, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+
-+ ASSERT_FALSE(more);
-+ ASSERT_EQ(max_entries, result.size());
-+ for (auto i = 0u; i < max_entries; ++i) {
-+ std::uint32_t val;
-+ std::tie(val, markers[i]) = decode_entry<std::uint32_t>(result[i]);
-+ ASSERT_EQ(i, val);
-+ }
-+
-+ /* trim one entry */
-+ c = R::Rados::aio_create_completion();
-+ f->trim(markers[min_entry], false, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+ ++min_entry;
-+
-+ c = R::Rados::aio_create_completion();
-+ f->list(max_entries * 10, std::nullopt, &result, &more, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+ ASSERT_FALSE(more);
-+ ASSERT_EQ(max_entries - min_entry, result.size());
-+
-+ for (auto i = min_entry; i < max_entries; ++i) {
-+ std::uint32_t val;
-+ std::tie(val, markers[i - min_entry]) =
-+ decode_entry<std::uint32_t>(result[i - min_entry]);
-+ EXPECT_EQ(i, val);
-+ }
-+}
-+
-+
-+TEST_F(AioLegacyFIFO, TestPushTooBig)
-+{
-+ static constexpr auto max_part_size = 2048ull;
-+ static constexpr auto max_entry_size = 128ull;
-+
-+ std::unique_ptr<RCf::FIFO> f;
-+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield, std::nullopt,
-+ std::nullopt, false, max_part_size, max_entry_size);
-+ ASSERT_EQ(0, r);
-+
-+ char buf[max_entry_size + 1];
-+ memset(buf, 0, sizeof(buf));
-+
-+ cb::list bl;
-+ bl.append(buf, sizeof(buf));
-+
-+ auto c = R::Rados::aio_create_completion();
-+ f->push(bl, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ ASSERT_EQ(-E2BIG, r);
-+ c->release();
-+
-+ c = R::Rados::aio_create_completion();
-+ f->push(std::vector<cb::list>{}, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ EXPECT_EQ(0, r);
-+}
-+
-+
-+TEST_F(AioLegacyFIFO, TestMultipleParts)
-+{
-+ static constexpr auto max_part_size = 2048ull;
-+ static constexpr auto max_entry_size = 128ull;
-+ std::unique_ptr<RCf::FIFO> f;
-+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield, std::nullopt,
-+ std::nullopt, false, max_part_size,
-+ max_entry_size);
-+ ASSERT_EQ(0, r);
-+
-+ {
-+ auto c = R::Rados::aio_create_completion();
-+ f->get_head_info([&](int r, RCf::part_info&& p) {
-+ ASSERT_TRUE(p.tag.empty());
-+ ASSERT_EQ(0, p.magic);
-+ ASSERT_EQ(0, p.min_ofs);
-+ ASSERT_EQ(0, p.last_ofs);
-+ ASSERT_EQ(0, p.next_ofs);
-+ ASSERT_EQ(0, p.min_index);
-+ ASSERT_EQ(0, p.max_index);
-+ ASSERT_EQ(ceph::real_time{}, p.max_time);
-+ }, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ }
-+
-+ char buf[max_entry_size];
-+ memset(buf, 0, sizeof(buf));
-+ const auto [part_header_size, part_entry_overhead] =
-+ f->get_part_layout_info();
-+ const auto entries_per_part = ((max_part_size - part_header_size) /
-+ (max_entry_size + part_entry_overhead));
-+ const auto max_entries = entries_per_part * 4 + 1;
-+ /* push enough entries */
-+ for (auto i = 0u; i < max_entries; ++i) {
-+ cb::list bl;
-+ *(int *)buf = i;
-+ bl.append(buf, sizeof(buf));
-+ auto c = R::Rados::aio_create_completion();
-+ f->push(bl, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ EXPECT_EQ(0, r);
-+ }
-+
-+ auto info = f->meta();
-+ ASSERT_EQ(info.id, fifo_id);
-+ /* head should have advanced */
-+ ASSERT_GT(info.head_part_num, 0);
-+
-+ /* list all at once */
-+ std::vector<RCf::list_entry> result;
-+ bool more = false;
-+ auto c = R::Rados::aio_create_completion();
-+ f->list(max_entries, std::nullopt, &result, &more, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ EXPECT_EQ(0, r);
-+ EXPECT_EQ(false, more);
-+ ASSERT_EQ(max_entries, result.size());
-+
-+ for (auto i = 0u; i < max_entries; ++i) {
-+ auto& bl = result[i].data;
-+ ASSERT_EQ(i, *(int *)bl.c_str());
-+ }
-+
-+ std::optional<std::string> marker;
-+ /* get entries one by one */
-+
-+ for (auto i = 0u; i < max_entries; ++i) {
-+ c = R::Rados::aio_create_completion();
-+ f->list(1, marker, &result, &more, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ EXPECT_EQ(0, r);
-+ ASSERT_EQ(result.size(), 1);
-+ const bool expected_more = (i != (max_entries - 1));
-+ ASSERT_EQ(expected_more, more);
-+
-+ std::uint32_t val;
-+ std::tie(val, marker) = decode_entry<std::uint32_t>(result.front());
-+
-+ auto& entry = result.front();
-+ auto& bl = entry.data;
-+ ASSERT_EQ(i, *(int *)bl.c_str());
-+ marker = entry.marker;
-+ }
-+
-+ /* trim one at a time */
-+ marker.reset();
-+ for (auto i = 0u; i < max_entries; ++i) {
-+ /* read single entry */
-+ c = R::Rados::aio_create_completion();
-+ f->list(1, marker, &result, &more, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ EXPECT_EQ(0, r);
-+ ASSERT_EQ(result.size(), 1);
-+ const bool expected_more = (i != (max_entries - 1));
-+ ASSERT_EQ(expected_more, more);
-+
-+ marker = result.front().marker;
-+ c = R::Rados::aio_create_completion();
-+ f->trim(*marker, false, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ EXPECT_EQ(0, r);
-+ ASSERT_EQ(result.size(), 1);
-+
-+ /* check tail */
-+ info = f->meta();
-+ ASSERT_EQ(info.tail_part_num, i / entries_per_part);
-+
-+ /* try to read all again, see how many entries left */
-+ c = R::Rados::aio_create_completion();
-+ f->list(max_entries, marker, &result, &more, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ EXPECT_EQ(0, r);
-+ ASSERT_EQ(max_entries - i - 1, result.size());
-+ ASSERT_EQ(false, more);
-+ }
-+
-+ /* tail now should point at head */
-+ info = f->meta();
-+ ASSERT_EQ(info.head_part_num, info.tail_part_num);
-+
-+ /* check old tails are removed */
-+ for (auto i = 0; i < info.tail_part_num; ++i) {
-+ c = R::Rados::aio_create_completion();
-+ RCf::part_info partinfo;
-+ f->get_part_info(i, &partinfo, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(-ENOENT, r);
-+ }
-+ /* check current tail exists */
-+ std::uint64_t next_ofs;
-+ {
-+ c = R::Rados::aio_create_completion();
-+ RCf::part_info partinfo;
-+ f->get_part_info(info.tail_part_num, &partinfo, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ next_ofs = partinfo.next_ofs;
-+ }
-+ ASSERT_EQ(0, r);
-+
-+ c = R::Rados::aio_create_completion();
-+ f->get_head_info([&](int r, RCf::part_info&& p) {
-+ ASSERT_EQ(next_ofs, p.next_ofs);
-+ }, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+}
-+
-+TEST_F(AioLegacyFIFO, TestTwoPushers)
-+{
-+ static constexpr auto max_part_size = 2048ull;
-+ static constexpr auto max_entry_size = 128ull;
-+
-+ std::unique_ptr<RCf::FIFO> f;
-+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield, std::nullopt,
-+ std::nullopt, false, max_part_size,
-+ max_entry_size);
-+ ASSERT_EQ(0, r);
-+ char buf[max_entry_size];
-+ memset(buf, 0, sizeof(buf));
-+
-+ auto [part_header_size, part_entry_overhead] = f->get_part_layout_info();
-+ const auto entries_per_part = ((max_part_size - part_header_size) /
-+ (max_entry_size + part_entry_overhead));
-+ const auto max_entries = entries_per_part * 4 + 1;
-+ std::unique_ptr<RCf::FIFO> f2;
-+ r = RCf::FIFO::open(ioctx, fifo_id, &f2, null_yield);
-+ std::vector fifos{&f, &f2};
-+
-+ for (auto i = 0u; i < max_entries; ++i) {
-+ cb::list bl;
-+ *(int *)buf = i;
-+ bl.append(buf, sizeof(buf));
-+ auto& f = *fifos[i % fifos.size()];
-+ auto c = R::Rados::aio_create_completion();
-+ f->push(bl, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+ }
-+
-+ /* list all by both */
-+ std::vector<RCf::list_entry> result;
-+ bool more = false;
-+ auto c = R::Rados::aio_create_completion();
-+ f2->list(max_entries, std::nullopt, &result, &more, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+ ASSERT_EQ(false, more);
-+ ASSERT_EQ(max_entries, result.size());
-+
-+ c = R::Rados::aio_create_completion();
-+ f2->list(max_entries, std::nullopt, &result, &more, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+ ASSERT_EQ(false, more);
-+ ASSERT_EQ(max_entries, result.size());
-+
-+ for (auto i = 0u; i < max_entries; ++i) {
-+ auto& bl = result[i].data;
-+ ASSERT_EQ(i, *(int *)bl.c_str());
-+ }
-+}
-+
-+TEST_F(AioLegacyFIFO, TestTwoPushersTrim)
-+{
-+ static constexpr auto max_part_size = 2048ull;
-+ static constexpr auto max_entry_size = 128ull;
-+ std::unique_ptr<RCf::FIFO> f1;
-+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f1, null_yield, std::nullopt,
-+ std::nullopt, false, max_part_size,
-+ max_entry_size);
-+ ASSERT_EQ(0, r);
-+
-+ char buf[max_entry_size];
-+ memset(buf, 0, sizeof(buf));
-+
-+ auto [part_header_size, part_entry_overhead] = f1->get_part_layout_info();
-+ const auto entries_per_part = ((max_part_size - part_header_size) /
-+ (max_entry_size + part_entry_overhead));
-+ const auto max_entries = entries_per_part * 4 + 1;
-+
-+ std::unique_ptr<RCf::FIFO> f2;
-+ r = RCf::FIFO::open(ioctx, fifo_id, &f2, null_yield);
-+ ASSERT_EQ(0, r);
-+
-+ /* push one entry to f2 and the rest to f1 */
-+ for (auto i = 0u; i < max_entries; ++i) {
-+ cb::list bl;
-+ *(int *)buf = i;
-+ bl.append(buf, sizeof(buf));
-+ auto& f = (i < 1 ? f2 : f1);
-+ auto c = R::Rados::aio_create_completion();
-+ f->push(bl, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+ }
-+
-+ /* trim half by fifo1 */
-+ auto num = max_entries / 2;
-+ std::string marker;
-+ std::vector<RCf::list_entry> result;
-+ bool more = false;
-+ auto c = R::Rados::aio_create_completion();
-+ f1->list(num, std::nullopt, &result, &more, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+ ASSERT_EQ(true, more);
-+ ASSERT_EQ(num, result.size());
-+
-+ for (auto i = 0u; i < num; ++i) {
-+ auto& bl = result[i].data;
-+ ASSERT_EQ(i, *(int *)bl.c_str());
-+ }
-+
-+ auto& entry = result[num - 1];
-+ marker = entry.marker;
-+ c = R::Rados::aio_create_completion();
-+ f1->trim(marker, false, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+ /* list what's left by fifo2 */
-+
-+ const auto left = max_entries - num;
-+ c = R::Rados::aio_create_completion();
-+ f2->list(left, marker, &result, &more, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+ ASSERT_EQ(left, result.size());
-+ ASSERT_EQ(false, more);
-+
-+ for (auto i = num; i < max_entries; ++i) {
-+ auto& bl = result[i - num].data;
-+ ASSERT_EQ(i, *(int *)bl.c_str());
-+ }
-+}
-+
-+TEST_F(AioLegacyFIFO, TestPushBatch)
-+{
-+ static constexpr auto max_part_size = 2048ull;
-+ static constexpr auto max_entry_size = 128ull;
-+
-+ std::unique_ptr<RCf::FIFO> f;
-+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield, std::nullopt,
-+ std::nullopt, false, max_part_size,
-+ max_entry_size);
-+ ASSERT_EQ(0, r);
-+
-+ char buf[max_entry_size];
-+ memset(buf, 0, sizeof(buf));
-+ auto [part_header_size, part_entry_overhead] = f->get_part_layout_info();
-+ auto entries_per_part = ((max_part_size - part_header_size) /
-+ (max_entry_size + part_entry_overhead));
-+ auto max_entries = entries_per_part * 4 + 1; /* enough entries to span multiple parts
*/
-+ std::vector<cb::list> bufs;
-+ for (auto i = 0u; i < max_entries; ++i) {
-+ cb::list bl;
-+ *(int *)buf = i;
-+ bl.append(buf, sizeof(buf));
-+ bufs.push_back(bl);
-+ }
-+ ASSERT_EQ(max_entries, bufs.size());
-+
-+ auto c = R::Rados::aio_create_completion();
-+ f->push(bufs, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+
-+ /* list all */
-+
-+ std::vector<RCf::list_entry> result;
-+ bool more = false;
-+ c = R::Rados::aio_create_completion();
-+ f->list(max_entries, std::nullopt, &result, &more, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(0, r);
-+ ASSERT_EQ(false, more);
-+ ASSERT_EQ(max_entries, result.size());
-+ for (auto i = 0u; i < max_entries; ++i) {
-+ auto& bl = result[i].data;
-+ ASSERT_EQ(i, *(int *)bl.c_str());
-+ }
-+ auto& info = f->meta();
-+ ASSERT_EQ(info.head_part_num, 4);
-+}
-
-From aede44ac6667c9a1ec7e813b547f8765754d896f Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Sat, 21 Nov 2020 01:44:36 -0500
-Subject: [PATCH 03/26] rgw: Factor out tool to deal with different log backing
-
-Read through the shards of a log and find out what kind it is.
-
-Also remove a log.
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit ed15d03f068c6f6e959f04d9d8f99eac82ebbd29)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/cls/log/cls_log_types.h | 3 +
- src/rgw/CMakeLists.txt | 1 +
- src/rgw/rgw_log_backing.cc | 215 +++++++++++++++++++++++++++++++
- src/rgw/rgw_log_backing.h | 70 ++++++++++
- src/test/rgw/CMakeLists.txt | 5 +
- src/test/rgw/test_log_backing.cc | 176 +++++++++++++++++++++++++
- 6 files changed, 470 insertions(+)
- create mode 100644 src/rgw/rgw_log_backing.cc
- create mode 100644 src/rgw/rgw_log_backing.h
- create mode 100644 src/test/rgw/test_log_backing.cc
-
-diff --git a/src/cls/log/cls_log_types.h b/src/cls/log/cls_log_types.h
-index c5c00766d8156..1746d243e5a14 100644
---- a/src/cls/log/cls_log_types.h
-+++ b/src/cls/log/cls_log_types.h
-@@ -65,6 +65,9 @@ inline bool operator ==(const cls_log_header& lhs, const
cls_log_header& rhs) {
- return (lhs.max_marker == rhs.max_marker &&
- lhs.max_time == rhs.max_time);
- }
-+inline bool operator !=(const cls_log_header& lhs, const cls_log_header& rhs) {
-+ return !(lhs == rhs);
-+}
- WRITE_CLASS_ENCODER(cls_log_header)
-
-
-diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt
-index 44de25895ea2d..d3d91d4957947 100644
---- a/src/rgw/CMakeLists.txt
-+++ b/src/rgw/CMakeLists.txt
-@@ -141,6 +141,7 @@ set(librgw_common_srcs
- rgw_tag.cc
- rgw_tag_s3.cc
- rgw_tools.cc
-+ rgw_log_backing.cc
- rgw_user.cc
- rgw_website.cc
- rgw_xml.cc
-diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
-new file mode 100644
-index 0000000000000..63edf972a0307
---- /dev/null
-+++ b/src/rgw/rgw_log_backing.cc
-@@ -0,0 +1,215 @@
-+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-+// vim: ts=8 sw=2 smarttab ft=cpp
-+
-+#include "cls/log/cls_log_client.h"
-+
-+#include "rgw_log_backing.h"
-+#include "rgw_tools.h"
-+#include "cls_fifo_legacy.h"
-+
-+static constexpr auto dout_subsys = ceph_subsys_rgw;
-+
-+enum class shard_check { dne, omap, fifo, corrupt };
-+inline std::ostream& operator <<(std::ostream& m, const shard_check&
t) {
-+ switch (t) {
-+ case shard_check::dne:
-+ return m << "shard_check::dne";
-+ case shard_check::omap:
-+ return m << "shard_check::omap";
-+ case shard_check::fifo:
-+ return m << "shard_check::fifo";
-+ case shard_check::corrupt:
-+ return m << "shard_check::corrupt";
-+ }
-+
-+ return m << "shard_check::UNKNOWN=" <<
static_cast<uint32_t>(t);
-+}
-+
-+namespace {
-+/// Return the shard type, and a bool to see whether it has entries.
-+std::pair<shard_check, bool>
-+probe_shard(librados::IoCtx& ioctx, const std::string& oid, optional_yield y)
-+{
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ bool omap = false;
-+ {
-+ librados::ObjectReadOperation op;
-+ cls_log_header header;
-+ cls_log_info(op, &header);
-+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, y);
-+ if (r == -ENOENT) {
-+ return { shard_check::dne, {} };
-+ }
-+
-+ if (r < 0) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " error probing for omap: r=" << r
-+ << ", oid=" << oid << dendl;
-+ return { shard_check::corrupt, {} };
-+ }
-+ if (header != cls_log_header{})
-+ omap = true;
-+ }
-+ std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
-+ auto r = rgw::cls::fifo::FIFO::open(ioctx, oid,
-+ &fifo, y,
-+ std::nullopt, true);
-+ if (r < 0 && !(r == -ENOENT || r == -ENODATA)) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " error probing for fifo: r=" << r
-+ << ", oid=" << oid << dendl;
-+ return { shard_check::corrupt, {} };
-+ }
-+ if (fifo && omap) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " fifo and omap found: oid=" << oid << dendl;
-+ return { shard_check::corrupt, {} };
-+ }
-+ if (fifo) {
-+ bool more = false;
-+ std::vector<rgw::cls::fifo::list_entry> entries;
-+ r = fifo->list(1, nullopt, &entries, &more, y);
-+ if (r < 0) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": unable to list entries: r=" << r
-+ << ", oid=" << oid << dendl;
-+ return { shard_check::corrupt, {} };
-+ }
-+ return { shard_check::fifo, !entries.empty() };
-+ }
-+ if (omap) {
-+ std::list<cls_log_entry> entries;
-+ std::string out_marker;
-+ bool truncated = false;
-+ librados::ObjectReadOperation op;
-+ cls_log_list(op, {}, {}, {}, 1, entries,
-+ &out_marker, &truncated);
-+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, y);
-+ if (r < 0) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": failed to list: r=" << r << ", oid="
<< oid << dendl;
-+ return { shard_check::corrupt, {} };
-+ }
-+ return { shard_check::omap, !entries.empty() };
-+ }
-+
-+ // An object exists, but has never had FIFO or cls_log entries written
-+ // to it. Likely just the marker Omap.
-+ return { shard_check::dne, {} };
-+}
-+
-+tl::expected<log_type, bs::error_code>
-+handle_dne(librados::IoCtx& ioctx,
-+ log_type def,
-+ std::string oid,
-+ optional_yield y)
-+{
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ if (def == log_type::fifo) {
-+ std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
-+ auto r = rgw::cls::fifo::FIFO::create(ioctx, oid,
-+ &fifo, y,
-+ std::nullopt);
-+ if (r < 0) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " error creating FIFO: r=" << r
-+ << ", oid=" << oid << dendl;
-+ return tl::unexpected(bs::error_code(-r, bs::system_category()));
-+ }
-+ }
-+ return def;
-+}
-+}
-+
-+tl::expected<log_type, bs::error_code>
-+log_backing_type(librados::IoCtx& ioctx,
-+ log_type def,
-+ int shards,
-+ const fu2::unique_function<std::string(int) const>& get_oid,
-+ optional_yield y)
-+{
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ auto check = shard_check::dne;
-+ for (int i = 0; i < shards; ++i) {
-+ auto [c, e] = probe_shard(ioctx, get_oid(i), y);
-+ if (c == shard_check::corrupt)
-+ return tl::unexpected(bs::error_code(EIO, bs::system_category()));
-+ if (c == shard_check::dne) continue;
-+ if (check == shard_check::dne) {
-+ check = c;
-+ continue;
-+ }
-+
-+ if (check != c) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " clashing types: check=" << check
-+ << ", c=" << c << dendl;
-+ return tl::unexpected(bs::error_code(EIO, bs::system_category()));
-+ }
-+ }
-+ if (check == shard_check::corrupt) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " should be unreachable!" << dendl;
-+ return tl::unexpected(bs::error_code(EIO, bs::system_category()));
-+ }
-+
-+ if (check == shard_check::dne)
-+ return handle_dne(ioctx,
-+ def,
-+ get_oid(0),
-+ y);
-+
-+ return (check == shard_check::fifo ? log_type::fifo : log_type::omap);
-+}
-+
-+bs::error_code log_remove(librados::IoCtx& ioctx,
-+ int shards,
-+ const fu2::unique_function<std::string(int) const>& get_oid,
-+ optional_yield y)
-+{
-+ bs::error_code ec;
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ for (int i = 0; i < shards; ++i) {
-+ auto oid = get_oid(i);
-+ rados::cls::fifo::info info;
-+ uint32_t part_header_size = 0, part_entry_overhead = 0;
-+
-+ auto r = rgw::cls::fifo::get_meta(ioctx, oid, nullopt, &info,
-+ &part_header_size, &part_entry_overhead,
-+ 0, y, true);
-+ if (r == -ENOENT) continue;
-+ if (r == 0 && info.head_part_num > -1) {
-+ for (auto j = info.tail_part_num; j <= info.head_part_num; ++j) {
-+ librados::ObjectWriteOperation op;
-+ op.remove();
-+ auto part_oid = info.part_oid(j);
-+ auto subr = rgw_rados_operate(ioctx, part_oid, &op, null_yield);
-+ if (subr < 0 && subr != -ENOENT) {
-+ if (!ec)
-+ ec = bs::error_code(-subr, bs::system_category());
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": failed removing FIFO part: part_oid=" << part_oid
-+ << ", subr=" << subr << dendl;
-+ }
-+ }
-+ }
-+ if (r < 0 && r != -ENODATA) {
-+ if (!ec)
-+ ec = bs::error_code(-r, bs::system_category());
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": failed checking FIFO part: oid=" << oid
-+ << ", r=" << r << dendl;
-+ }
-+ librados::ObjectWriteOperation op;
-+ op.remove();
-+ r = rgw_rados_operate(ioctx, oid, &op, null_yield);
-+ if (r < 0 && r != -ENOENT) {
-+ if (!ec)
-+ ec = bs::error_code(-r, bs::system_category());
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": failed removing shard: oid=" << oid
-+ << ", r=" << r << dendl;
-+ }
-+ }
-+ return ec;
-+}
-diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
-new file mode 100644
-index 0000000000000..d769af48b01fe
---- /dev/null
-+++ b/src/rgw/rgw_log_backing.h
-@@ -0,0 +1,70 @@
-+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-+// vim: ts=8 sw=2 smarttab ft=cpp
-+
-+#ifndef CEPH_RGW_LOGBACKING_H
-+#define CEPH_RGW_LOGBACKING_H
-+
-+#include <optional>
-+#include <iostream>
-+#include <string>
-+#include <string_view>
-+
-+#include <strings.h>
-+
-+#include <boost/system/error_code.hpp>
-+
-+#include "include/rados/librados.hpp"
-+#include "include/expected.hpp"
-+#include "include/function2.hpp"
-+
-+#include "common/async/yield_context.h"
-+
-+namespace bs = boost::system;
-+
-+/// Type of log backing, stored in the mark used in the quick check,
-+/// and passed to checking functions.
-+enum class log_type {
-+ omap = 0,
-+ fifo = 1
-+};
-+
-+inline std::optional<log_type> to_log_type(std::string_view s) {
-+ if (strncasecmp(s.data(), "omap", s.length()) == 0) {
-+ return log_type::omap;
-+ } else if (strncasecmp(s.data(), "fifo", s.length()) == 0) {
-+ return log_type::fifo;
-+ } else {
-+ return std::nullopt;
-+ }
-+}
-+inline std::ostream& operator <<(std::ostream& m, const log_type& t)
{
-+ switch (t) {
-+ case log_type::omap:
-+ return m << "log_type::omap";
-+ case log_type::fifo:
-+ return m << "log_type::fifo";
-+ }
-+
-+ return m << "log_type::UNKNOWN=" <<
static_cast<uint32_t>(t);
-+}
-+
-+/// Look over the shards in a log and determine the type.
-+tl::expected<log_type, bs::error_code>
-+log_backing_type(librados::IoCtx& ioctx,
-+ log_type def,
-+ int shards, //< Total number of shards
-+ /// A function taking a shard number and
-+ /// returning an oid.
-+ const fu2::unique_function<std::string(int) const>& get_oid,
-+ optional_yield y);
-+
-+/// Remove all log shards and associated parts of fifos.
-+bs::error_code log_remove(librados::IoCtx& ioctx,
-+ int shards, //< Total number of shards
-+ /// A function taking a shard number and
-+ /// returning an oid.
-+ const fu2::unique_function<std::string(int) const>& get_oid,
-+ optional_yield y);
-+
-+
-+#endif
-diff --git a/src/test/rgw/CMakeLists.txt b/src/test/rgw/CMakeLists.txt
-index 7817a42ef9ab8..c4aa22db81749 100644
---- a/src/test/rgw/CMakeLists.txt
-+++ b/src/test/rgw/CMakeLists.txt
-@@ -213,6 +213,11 @@ add_executable(unittest_cls_fifo_legacy test_cls_fifo_legacy.cc)
- target_link_libraries(unittest_cls_fifo_legacy radostest-cxx ${UNITTEST_LIBS}
- ${rgw_libs})
-
-+# unittest_log_backing
-+add_executable(unittest_log_backing test_log_backing.cc)
-+target_link_libraries(unittest_log_backing radostest-cxx ${UNITTEST_LIBS}
-+ ${rgw_libs})
-+
- add_executable(unittest_rgw_lua test_rgw_lua.cc)
- add_ceph_unittest(unittest_rgw_lua)
- target_link_libraries(unittest_rgw_lua ${rgw_libs} ${LUA_LIBRARIES})
-diff --git a/src/test/rgw/test_log_backing.cc b/src/test/rgw/test_log_backing.cc
-new file mode 100644
-index 0000000000000..5180d5fc74fe8
---- /dev/null
-+++ b/src/test/rgw/test_log_backing.cc
-@@ -0,0 +1,176 @@
-+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-+// vim: ts=8 sw=2 smarttab
-+/*
-+ * Ceph - scalable distributed file system
-+ *
-+ * Copyright (C) 2019 Red Hat, Inc.
-+ *
-+ * This is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License version 2.1, as published by the Free Software
-+ * Foundation. See file COPYING.
-+ *
-+ */
-+
-+#include "rgw_log_backing.h"
-+
-+#include <cerrno>
-+#include <iostream>
-+#include <string_view>
-+
-+#undef FMT_HEADER_ONLY
-+#define FMT_HEADER_ONLY 1
-+#include <fmt/format.h>
-+
-+#include "include/types.h"
-+#include "include/rados/librados.hpp"
-+
-+#include "test/librados/test_cxx.h"
-+#include "global/global_context.h"
-+
-+#include "cls/log/cls_log_client.h"
-+
-+#include "rgw/rgw_tools.h"
-+#include "rgw/cls_fifo_legacy.h"
-+
-+#include "gtest/gtest.h"
-+
-+namespace lr = librados;
-+namespace cb = ceph::buffer;
-+namespace fifo = rados::cls::fifo;
-+namespace RCf = rgw::cls::fifo;
-+
-+class LogBacking : public testing::Test {
-+protected:
-+ static constexpr int SHARDS = 3;
-+ const std::string pool_name = get_temp_pool_name();
-+ lr::Rados rados;
-+ lr::IoCtx ioctx;
-+
-+ void SetUp() override {
-+ ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
-+ ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
-+ }
-+ void TearDown() override {
-+ destroy_one_pool_pp(pool_name, rados);
-+ }
-+
-+ static std::string get_oid(int i) {
-+ return fmt::format("shard.{}", i);
-+ }
-+
-+ void make_omap() {
-+ for (int i = 0; i < SHARDS; ++i) {
-+ using ceph::encode;
-+ lr::ObjectWriteOperation op;
-+ cb::list bl;
-+ encode(i, bl);
-+ cls_log_add(op, ceph_clock_now(), {}, "meow", bl);
-+ auto r = rgw_rados_operate(ioctx, get_oid(i), &op, null_yield);
-+ ASSERT_GE(r, 0);
-+ }
-+ }
-+
-+ void add_omap(int i) {
-+ using ceph::encode;
-+ lr::ObjectWriteOperation op;
-+ cb::list bl;
-+ encode(i, bl);
-+ cls_log_add(op, ceph_clock_now(), {}, "meow", bl);
-+ auto r = rgw_rados_operate(ioctx, get_oid(i), &op, null_yield);
-+ ASSERT_GE(r, 0);
-+ }
-+
-+ void empty_omap() {
-+ for (int i = 0; i < SHARDS; ++i) {
-+ auto oid = get_oid(i);
-+ std::string to_marker;
-+ {
-+ lr::ObjectReadOperation op;
-+ std::list<cls_log_entry> entries;
-+ bool truncated = false;
-+ cls_log_list(op, {}, {}, {}, 1, entries, &to_marker, &truncated);
-+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, null_yield);
-+ ASSERT_GE(r, 0);
-+ ASSERT_FALSE(entries.empty());
-+ }
-+ {
-+ lr::ObjectWriteOperation op;
-+ cls_log_trim(op, {}, {}, {}, to_marker);
-+ auto r = rgw_rados_operate(ioctx, oid, &op, null_yield);
-+ ASSERT_GE(r, 0);
-+ }
-+ {
-+ lr::ObjectReadOperation op;
-+ std::list<cls_log_entry> entries;
-+ bool truncated = false;
-+ cls_log_list(op, {}, {}, {}, 1, entries, &to_marker, &truncated);
-+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, null_yield);
-+ ASSERT_GE(r, 0);
-+ ASSERT_TRUE(entries.empty());
-+ }
-+ }
-+ }
-+
-+ void make_fifo()
-+ {
-+ for (int i = 0; i < SHARDS; ++i) {
-+ std::unique_ptr<RCf::FIFO> fifo;
-+ auto r = RCf::FIFO::create(ioctx, get_oid(i), &fifo, null_yield);
-+ ASSERT_EQ(0, r);
-+ ASSERT_TRUE(fifo);
-+ }
-+ }
-+
-+ void add_fifo(int i)
-+ {
-+ using ceph::encode;
-+ std::unique_ptr<RCf::FIFO> fifo;
-+ auto r = RCf::FIFO::open(ioctx, get_oid(i), &fifo, null_yield);
-+ ASSERT_GE(0, r);
-+ ASSERT_TRUE(fifo);
-+ cb::list bl;
-+ encode(i, bl);
-+ r = fifo->push(bl, null_yield);
-+ ASSERT_GE(0, r);
-+ }
-+
-+ void assert_empty() {
-+ std::vector<lr::ObjectItem> result;
-+ lr::ObjectCursor next;
-+ auto r = ioctx.object_list(ioctx.object_list_begin(), ioctx.object_list_end(),
-+ 100, {}, &result, &next);
-+ ASSERT_GE(r, 0);
-+ ASSERT_TRUE(result.empty());
-+ }
-+};
-+
-+TEST_F(LogBacking, TestOmap)
-+{
-+ make_omap();
-+ auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
-+ get_oid, null_yield);
-+ ASSERT_EQ(log_type::omap, *stat);
-+}
-+
-+TEST_F(LogBacking, TestOmapEmpty)
-+{
-+ auto stat = log_backing_type(ioctx, log_type::omap, SHARDS,
-+ get_oid, null_yield);
-+ ASSERT_EQ(log_type::omap, *stat);
-+}
-+
-+TEST_F(LogBacking, TestFIFO)
-+{
-+ make_fifo();
-+ auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
-+ get_oid, null_yield);
-+ ASSERT_EQ(log_type::fifo, *stat);
-+}
-+
-+TEST_F(LogBacking, TestFIFOEmpty)
-+{
-+ auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
-+ get_oid, null_yield);
-+ ASSERT_EQ(log_type::fifo, *stat);
-+}
-
-From 8c81b6fa1b2a0f1d409afbd0126d18cfc97315c4 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Sat, 21 Nov 2020 15:45:12 -0500
-Subject: [PATCH 04/26] rgw: Use refactored log backing tools
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit da6223d281e33e43fa74c50f4d0eedb5ac25ace4)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/common/options.cc | 16 ++--
- src/rgw/rgw_datalog.cc | 208 +++++------------------------------------
- src/rgw/rgw_datalog.h | 5 +-
- 3 files changed, 31 insertions(+), 198 deletions(-)
-
-diff --git a/src/common/options.cc b/src/common/options.cc
-index 75d6589c08296..8fdd62fb14ccb 100644
---- a/src/common/options.cc
-+++ b/src/common/options.cc
-@@ -7407,17 +7407,15 @@ std::vector<Option> get_rgw_options() {
- .add_see_also("rgw_dmclock_metadata_res")
- .add_see_also("rgw_dmclock_metadata_wgt"),
-
-- Option("rgw_data_log_backing", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-- .set_default("auto")
-- .set_enum_allowed( { "auto", "fifo", "omap" } )
-- .set_description("Backing store for the RGW data sync log")
-+ Option("rgw_default_data_log_backing", Option::TYPE_STR,
Option::LEVEL_ADVANCED)
-+ .set_default("fifo")
-+ .set_enum_allowed( { "fifo", "omap" } )
-+ .set_description("Default backing store for the RGW data sync log")
- .set_long_description(
- "Whether to use the older OMAP backing store or the high performance
"
-- "FIFO based backing store. Auto uses whatever already exists "
-- "but will default to FIFO if there isn't an existing log. Either of "
-- "the explicit options will cause startup to fail if the other log is "
-- "still around."),
--
-+ "FIFO based backing store by default. This only covers the creation of "
-+ "the log on startup if none exists."),
-+
- Option("rgw_luarocks_location", Option::TYPE_STR, Option::LEVEL_ADVANCED)
- .set_flag(Option::FLAG_STARTUP)
- #ifdef WITH_RADOSGW_LUA_PACKAGES
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index 8142b26e01a8b..d6a9d210d1b56 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -14,6 +14,7 @@
-
- #include "cls_fifo_legacy.h"
- #include "rgw_datalog.h"
-+#include "rgw_log_backing.h"
- #include "rgw_tools.h"
-
- #define dout_context g_ceph_context
-@@ -67,38 +68,6 @@ void rgw_data_change_log_entry::decode_json(JSONObj *obj) {
- JSONDecoder::decode_json("entry", entry, obj);
- }
-
--int RGWDataChangesBE::remove(CephContext* cct, librados::Rados* rados,
-- const rgw_pool& log_pool)
--{
-- auto num_shards = cct->_conf->rgw_data_log_num_shards;
-- librados::IoCtx ioctx;
-- auto r = rgw_init_ioctx(rados, log_pool.name, ioctx,
-- false, false);
-- if (r < 0) {
-- if (r == -ENOENT) {
-- return 0;
-- } else {
-- lderr(cct) << __PRETTY_FUNCTION__
-- << ": rgw_init_ioctx failed: " << log_pool.name
-- << ": " << cpp_strerror(-r) << dendl;
-- return r;
-- }
-- }
-- for (auto i = 0; i < num_shards; ++i) {
-- auto oid = get_oid(cct, i);
-- librados::ObjectWriteOperation op;
-- op.remove();
-- auto r = rgw_rados_operate(ioctx, oid, &op, null_yield);
-- if (r < 0 && r != -ENOENT) {
-- lderr(cct) << __PRETTY_FUNCTION__
-- << ": remove failed: " << log_pool.name << "/"
<< oid
-- << ": " << cpp_strerror(-r) << dendl;
-- }
-- }
-- return 0;
--}
--
--
- class RGWDataChangesOmap final : public RGWDataChangesBE {
- using centries = std::list<cls_log_entry>;
- RGWSI_Cls& cls;
-@@ -113,44 +82,6 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
- }
- }
- ~RGWDataChangesOmap() override = default;
-- static int exists(CephContext* cct, RGWSI_Cls& cls, bool* exists,
-- bool* has_entries) {
-- auto num_shards = cct->_conf->rgw_data_log_num_shards;
-- std::string out_marker;
-- bool truncated = false;
-- std::list<cls_log_entry> log_entries;
-- const cls_log_header empty_info;
-- *exists = false;
-- *has_entries = false;
-- for (auto i = 0; i < num_shards; ++i) {
-- cls_log_header info;
-- auto oid = get_oid(cct, i);
-- auto r = cls.timelog.info(oid, &info, null_yield);
-- if (r < 0 && r != -ENOENT) {
-- lderr(cct) << __PRETTY_FUNCTION__
-- << ": failed to get info " << oid << ": "
<< cpp_strerror(-r)
-- << dendl;
-- return r;
-- } else if ((r == -ENOENT) || (info == empty_info)) {
-- continue;
-- }
-- *exists = true;
-- r = cls.timelog.list(oid, {}, {}, 100, log_entries, "",
&out_marker,
-- &truncated, null_yield);
-- if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__
-- << ": failed to list " << oid << ": "
<< cpp_strerror(-r)
-- << dendl;
-- return r;
-- } else if (!log_entries.empty()) {
-- *has_entries = true;
-- break; // No reason to continue, once we have both existence
-- // AND non-emptiness
-- }
-- }
-- return 0;
-- }
--
- void prepare(ceph::real_time ut, const std::string& key,
- ceph::buffer::list&& entry, entries& out) override {
- if (!std::holds_alternative<centries>(out)) {
-@@ -294,54 +225,6 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- }));
- }
- ~RGWDataChangesFIFO() override = default;
-- static int exists(CephContext* cct, librados::Rados* rados,
-- const rgw_pool& log_pool, bool* exists, bool* has_entries) {
-- auto num_shards = cct->_conf->rgw_data_log_num_shards;
-- librados::IoCtx ioctx;
-- auto r = rgw_init_ioctx(rados, log_pool.name, ioctx,
-- false, false);
-- if (r < 0) {
-- if (r == -ENOENT) {
-- return 0;
-- } else {
-- lderr(cct) << __PRETTY_FUNCTION__
-- << ": rgw_init_ioctx failed: " << log_pool.name
-- << ": " << cpp_strerror(-r) << dendl;
-- return r;
-- }
-- }
-- *exists = false;
-- *has_entries = false;
-- for (auto i = 0; i < num_shards; ++i) {
-- std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
-- auto oid = get_oid(cct, i);
-- std::vector<rgw::cls::fifo::list_entry> log_entries;
-- bool more = false;
-- auto r = rgw::cls::fifo::FIFO::open(ioctx, oid,
-- &fifo, null_yield,
-- std::nullopt, true);
-- if (r == -ENOENT || r == -ENODATA) {
-- continue;
-- } else if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__
-- << ": unable to open FIFO: " << log_pool <<
"/" << oid
-- << ": " << cpp_strerror(-r) << dendl;
-- return r;
-- }
-- *exists = true;
-- r = fifo->list(1, nullopt, &log_entries, &more,
-- null_yield);
-- if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__
-- << ": unable to list entries: " << log_pool <<
"/" << oid
-- << ": " << cpp_strerror(-r) << dendl;
-- } else if (!log_entries.empty()) {
-- *has_entries = true;
-- break;
-- }
-- }
-- return 0;
-- }
- void prepare(ceph::real_time, const std::string&,
- ceph::buffer::list&& entry, entries& out) override {
- if (!std::holds_alternative<centries>(out)) {
-@@ -490,83 +373,38 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
- RGWSI_Cls *cls, librados::Rados* lr)
- {
- zone = _zone;
-- assert(zone);
-- auto backing =
cct->_conf.get_val<std::string>("rgw_data_log_backing");
-+ ceph_assert(zone);
-+ auto defbacking = to_log_type(
-+
cct->_conf.get_val<std::string>("rgw_default_data_log_backing"));
- // Should be guaranteed by `set_enum_allowed`
-- ceph_assert(backing == "auto" || backing == "fifo" || backing ==
"omap");
-+ ceph_assert(defbacking);
- auto log_pool = zoneparams.log_pool;
-- bool omapexists = false, omaphasentries = false;
-- auto r = RGWDataChangesOmap::exists(cct, *cls, &omapexists, &omaphasentries);
-+ auto r = rgw_init_ioctx(lr, log_pool, ioctx, true, false);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
-- << ": Error when checking for existing Omap datalog backend: "
-- << cpp_strerror(-r) << dendl;
-+ << ": Failed to initialized ioctx, r=" << r
-+ << ", pool=" << log_pool << dendl;
-+ return -r;
- }
-- bool fifoexists = false, fifohasentries = false;
-- r = RGWDataChangesFIFO::exists(cct, lr, log_pool, &fifoexists,
&fifohasentries);
-- if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__
-- << ": Error when checking for existing FIFO datalog backend: "
-- << cpp_strerror(-r) << dendl;
-- }
-- bool has_entries = omaphasentries || fifohasentries;
-- bool remove = false;
-+ auto found = log_backing_type(ioctx, *defbacking, num_shards,
-+ [this](int i) {
-+ return RGWDataChangesBE::get_oid(cct, i);
-+ },
-+ null_yield);
-
-- if (omapexists && fifoexists) {
-- if (has_entries) {
-- lderr(cct) << __PRETTY_FUNCTION__
-- << ": Both Omap and FIFO backends exist, cannot continue."
-- << dendl;
-- return -EINVAL;
-- }
-- ldout(cct, 0)
-- << __PRETTY_FUNCTION__
-- << ": Both Omap and FIFO backends exist, but are empty. Will
remove."
-- << dendl;
-- remove = true;
-- }
-- if (backing == "omap" && fifoexists) {
-- if (has_entries) {
-- lderr(cct) << __PRETTY_FUNCTION__
-- << ": Omap requested, but FIFO backend exists, cannot continue."
-- << dendl;
-- return -EINVAL;
-- }
-- ldout(cct, 0) << __PRETTY_FUNCTION__
-- << ": Omap requested, FIFO exists, but is empty. Deleting."
-- << dendl;
-- remove = true;
-- }
-- if (backing == "fifo" && omapexists) {
-- if (has_entries) {
-- lderr(cct) << __PRETTY_FUNCTION__
-- << ": FIFO requested, but Omap backend exists, cannot continue."
-- << dendl;
-- return -EINVAL;
-- }
-- ldout(cct, 0) << __PRETTY_FUNCTION__
-- << ": FIFO requested, Omap exists, but is empty. Deleting."
-- << dendl;
-- remove = true;
-- }
--
-- if (remove) {
-- r = RGWDataChangesBE::remove(cct, lr, log_pool);
-- if (r < 0) {
-- lderr(cct) << __PRETTY_FUNCTION__
-- << ": remove failed, cannot continue."
-- << dendl;
-- return r;
-- }
-- omapexists = false;
-- fifoexists = false;
-+ if (!found) {
-+ lderr(cct) << __PRETTY_FUNCTION__
-+ << ": Error when checking log type: "
-+ << found.error().message() << dendl;
- }
--
- try {
-- if (backing == "omap" || (backing == "auto" &&
omapexists)) {
-+ switch (*found) {
-+ case log_type::omap:
- be = std::make_unique<RGWDataChangesOmap>(cct, *cls);
-- } else if (backing != "omap") {
-+ break;
-+ case log_type::fifo:
- be = std::make_unique<RGWDataChangesFIFO>(cct, lr, log_pool);
-+ break;
- }
- } catch (bs::system_error& e) {
- lderr(cct) << __PRETTY_FUNCTION__
-diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
-index 5440b3d1e4ba8..af5f4f0276a68 100644
---- a/src/rgw/rgw_datalog.h
-+++ b/src/rgw/rgw_datalog.h
-@@ -142,10 +142,6 @@ class RGWDataChangesBE {
- std::string get_oid(int i) {
- return fmt::format("{}.{}", prefix, i);
- }
-- static int remove(CephContext* cct, librados::Rados* rados,
-- const rgw_pool& log_pool);
--
--
- virtual void prepare(ceph::real_time now,
- const std::string& key,
- ceph::buffer::list&& entry,
-@@ -167,6 +163,7 @@ class RGWDataChangesBE {
-
- class RGWDataChangesLog {
- CephContext *cct;
-+ librados::IoCtx ioctx;
- rgw::BucketChangeObserver *observer = nullptr;
- const RGWZone* zone;
- std::unique_ptr<RGWDataChangesBE> be;
-
-From 57a76a06c75f60a8bb6d570c599eb40e15f93df2 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Sat, 21 Nov 2020 17:05:04 -0500
-Subject: [PATCH 05/26] rgw/datalog: Pass IoCtx in, don't have each backend
- make its own
-
-Also don't use svc_cls.
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 7f097cf8db433bb4c82a9bafc44e43b84f79bca4)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_datalog.cc | 68 ++++++++++++++++++++----------------------
- src/rgw/rgw_datalog.h | 10 +++----
- src/rgw/rgw_service.cc | 2 +-
- 3 files changed, 38 insertions(+), 42 deletions(-)
-
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index d6a9d210d1b56..92ad1869d3f48 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -11,6 +11,7 @@
- #include "common/async/librados_completion.h"
-
- #include "cls/fifo/cls_fifo_types.h"
-+#include "cls/log/cls_log_client.h"
-
- #include "cls_fifo_legacy.h"
- #include "rgw_datalog.h"
-@@ -21,6 +22,7 @@
- static constexpr auto dout_subsys = ceph_subsys_rgw;
-
- namespace bs = boost::system;
-+namespace lr = librados;
-
- void rgw_data_change::dump(ceph::Formatter *f) const
- {
-@@ -70,12 +72,10 @@ void rgw_data_change_log_entry::decode_json(JSONObj *obj) {
-
- class RGWDataChangesOmap final : public RGWDataChangesBE {
- using centries = std::list<cls_log_entry>;
-- RGWSI_Cls& cls;
- std::vector<std::string> oids;
- public:
-- RGWDataChangesOmap(CephContext* cct, RGWSI_Cls& cls)
-- : RGWDataChangesBE(cct), cls(cls) {
-- auto num_shards = cct->_conf->rgw_data_log_num_shards;
-+ RGWDataChangesOmap(lr::IoCtx& ioctx, int num_shards)
-+ : RGWDataChangesBE(ioctx) {
- oids.reserve(num_shards);
- for (auto i = 0; i < num_shards; ++i) {
- oids.push_back(get_oid(i));
-@@ -90,12 +90,13 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
- }
-
- cls_log_entry e;
-- cls.timelog.prepare_entry(e, ut, {}, key, entry);
-+ cls_log_add_prepare_entry(e, utime_t(ut), {}, key, entry);
- std::get<centries>(out).push_back(std::move(e));
- }
- int push(int index, entries&& items) override {
-- auto r = cls.timelog.add(oids[index], std::get<centries>(items),
-- nullptr, true, null_yield);
-+ lr::ObjectWriteOperation op;
-+ cls_log_add(op, std::get<centries>(items), true);
-+ auto r = rgw_rados_operate(ioctx, oids[index], &op, null_yield);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": failed to push to " << oids[index] <<
cpp_strerror(-r)
-@@ -106,7 +107,9 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
- int push(int index, ceph::real_time now,
- const std::string& key,
- ceph::buffer::list&& bl) override {
-- auto r = cls.timelog.add(oids[index], now, {}, key, bl, null_yield);
-+ lr::ObjectWriteOperation op;
-+ cls_log_add(op, utime_t(now), {}, key, bl);
-+ auto r = rgw_rados_operate(ioctx, oids[index], &op, null_yield);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": failed to push to " << oids[index]
-@@ -119,10 +122,10 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
- std::optional<std::string_view> marker,
- std::string* out_marker, bool* truncated) override {
- std::list<cls_log_entry> log_entries;
-- auto r = cls.timelog.list(oids[index], {}, {},
-- max_entries, log_entries,
-- std::string(marker.value_or("")),
-- out_marker, truncated, null_yield);
-+ lr::ObjectReadOperation op;
-+ cls_log_list(op, {}, {}, std::string(marker.value_or("")),
-+ max_entries, log_entries, out_marker, truncated);
-+ auto r = rgw_rados_operate(ioctx, oids[index], &op, nullptr, null_yield);
- if (r == -ENOENT) {
- *truncated = false;
- return 0;
-@@ -153,7 +156,9 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
- }
- int get_info(int index, RGWDataChangesLogInfo *info) override {
- cls_log_header header;
-- auto r = cls.timelog.info(oids[index], &header, null_yield);
-+ lr::ObjectReadOperation op;
-+ cls_log_info(op, &header);
-+ auto r = rgw_rados_operate(ioctx, oids[index], &op, nullptr, null_yield);
- if (r == -ENOENT) r = 0;
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
-@@ -166,10 +171,9 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
- return r;
- }
- int trim(int index, std::string_view marker) override {
-- auto r = cls.timelog.trim(oids[index], {}, {},
-- {}, std::string(marker), nullptr,
-- null_yield);
--
-+ lr::ObjectWriteOperation op;
-+ cls_log_trim(op, {}, {}, {}, std::string(marker));
-+ auto r = rgw_rados_operate(ioctx, oids[index], &op, null_yield);
- if (r == -ENOENT) r = 0;
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
-@@ -179,10 +183,10 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
- return r;
- }
- int trim(int index, std::string_view marker,
-- librados::AioCompletion* c) override {
-- auto r = cls.timelog.trim(oids[index], {}, {},
-- {}, std::string(marker), c, null_yield);
--
-+ lr::AioCompletion* c) override {
-+ lr::ObjectWriteOperation op;
-+ cls_log_trim(op, {}, {}, {}, std::string(marker));
-+ auto r = ioctx.aio_operate(oids[index], c, &op, 0);
- if (r == -ENOENT) r = 0;
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
-@@ -200,20 +204,12 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- using centries = std::vector<ceph::buffer::list>;
- std::vector<std::unique_ptr<rgw::cls::fifo::FIFO>> fifos;
- public:
-- RGWDataChangesFIFO(CephContext* cct, librados::Rados* rados,
-- const rgw_pool& log_pool)
-- : RGWDataChangesBE(cct) {
-- librados::IoCtx ioctx;
-- auto shards = cct->_conf->rgw_data_log_num_shards;
-- auto r = rgw_init_ioctx(rados, log_pool.name, ioctx,
-- true, false);
-- if (r < 0) {
-- throw bs::system_error(ceph::to_error_code(r));
-- }
-+ RGWDataChangesFIFO(lr::IoCtx& ioctx, int shards)
-+ : RGWDataChangesBE(ioctx) {
- fifos.resize(shards);
- for (auto i = 0; i < shards; ++i) {
-- r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
-- &fifos[i], null_yield);
-+ auto r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
-+ &fifos[i], null_yield);
- if (r < 0) {
- throw bs::system_error(ceph::to_error_code(r));
- }
-@@ -370,7 +366,7 @@ RGWDataChangesLog::RGWDataChangesLog(CephContext* cct)
-
- int RGWDataChangesLog::start(const RGWZone* _zone,
- const RGWZoneParams& zoneparams,
-- RGWSI_Cls *cls, librados::Rados* lr)
-+ librados::Rados* lr)
- {
- zone = _zone;
- ceph_assert(zone);
-@@ -400,10 +396,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
- try {
- switch (*found) {
- case log_type::omap:
-- be = std::make_unique<RGWDataChangesOmap>(cct, *cls);
-+ be = std::make_unique<RGWDataChangesOmap>(ioctx, num_shards);
- break;
- case log_type::fifo:
-- be = std::make_unique<RGWDataChangesFIFO>(cct, lr, log_pool);
-+ be = std::make_unique<RGWDataChangesFIFO>(ioctx, num_shards);
- break;
- }
- } catch (bs::system_error& e) {
-diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
-index af5f4f0276a68..f6f52382f0947 100644
---- a/src/rgw/rgw_datalog.h
-+++ b/src/rgw/rgw_datalog.h
-@@ -37,8 +37,6 @@
- #include "rgw_zone.h"
- #include "rgw_trim_bilog.h"
-
--#include "services/svc_cls.h"
--
- namespace bc = boost::container;
-
- enum DataLogEntityType {
-@@ -118,6 +116,7 @@ struct RGWDataChangesLogMarker {
-
- class RGWDataChangesBE {
- protected:
-+ librados::IoCtx& ioctx;
- CephContext* const cct;
- private:
- std::string prefix;
-@@ -132,8 +131,9 @@ class RGWDataChangesBE {
- using entries = std::variant<std::list<cls_log_entry>,
- std::vector<ceph::buffer::list>>;
-
-- RGWDataChangesBE(CephContext* const cct)
-- : cct(cct), prefix(get_prefix(cct)) {}
-+ RGWDataChangesBE(librados::IoCtx& ioctx)
-+ : ioctx(ioctx), cct(static_cast<CephContext*>(ioctx.cct())),
-+ prefix(get_prefix(cct)) {}
- virtual ~RGWDataChangesBE() = default;
-
- static std::string get_oid(CephContext* cct, int i) {
-@@ -214,7 +214,7 @@ class RGWDataChangesLog {
- ~RGWDataChangesLog();
-
- int start(const RGWZone* _zone, const RGWZoneParams& zoneparams,
-- RGWSI_Cls *cls_svc, librados::Rados* lr);
-+ librados::Rados* lr);
-
- int add_entry(const RGWBucketInfo& bucket_info, int shard_id);
- int get_log_shard_id(rgw_bucket& bucket, int shard_id);
-diff --git a/src/rgw/rgw_service.cc b/src/rgw/rgw_service.cc
-index 3fb4f2b0b6413..7c7d8a02675d4 100644
---- a/src/rgw/rgw_service.cc
-+++ b/src/rgw/rgw_service.cc
-@@ -141,7 +141,7 @@ int RGWServices_Def::init(CephContext *cct,
- }
-
- r = datalog_rados->start(&zone->get_zone(),
-- zone->get_zone_params(), cls.get(),
-+ zone->get_zone_params(),
- rados->get_rados_handle());
- if (r < 0) {
- ldout(cct, 0) << "ERROR: failed to start datalog_rados service ("
<< cpp_strerror(-r) << dendl;
-
-From 665829501df70d80d7aa3c2227bfefb363f5b7bc Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Sat, 21 Nov 2020 18:20:57 -0500
-Subject: [PATCH 06/26] rgw: Move get_oid back to RGWDataChangesLog
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit bdd3528e54e399135f602e1f7e94d070d89b8c99)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_datalog.cc | 32 +++++++++++++++++++++-----------
- src/rgw/rgw_datalog.h | 28 ++++++++++++----------------
- 2 files changed, 33 insertions(+), 27 deletions(-)
-
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index 92ad1869d3f48..9fc2fff83c103 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -73,9 +73,14 @@ void rgw_data_change_log_entry::decode_json(JSONObj *obj) {
- class RGWDataChangesOmap final : public RGWDataChangesBE {
- using centries = std::list<cls_log_entry>;
- std::vector<std::string> oids;
-+ std::string get_oid(int i) const {
-+ return datalog.get_oid(i);
-+ }
- public:
-- RGWDataChangesOmap(lr::IoCtx& ioctx, int num_shards)
-- : RGWDataChangesBE(ioctx) {
-+ RGWDataChangesOmap(lr::IoCtx& ioctx,
-+ RGWDataChangesLog& datalog,
-+ int num_shards)
-+ : RGWDataChangesBE(ioctx, datalog) {
- oids.reserve(num_shards);
- for (auto i = 0; i < num_shards; ++i) {
- oids.push_back(get_oid(i));
-@@ -203,9 +208,14 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
- class RGWDataChangesFIFO final : public RGWDataChangesBE {
- using centries = std::vector<ceph::buffer::list>;
- std::vector<std::unique_ptr<rgw::cls::fifo::FIFO>> fifos;
-+ std::string get_oid(int i) const {
-+ return datalog.get_oid(i);
-+ }
- public:
-- RGWDataChangesFIFO(lr::IoCtx& ioctx, int shards)
-- : RGWDataChangesBE(ioctx) {
-+ RGWDataChangesFIFO(lr::IoCtx& ioctx,
-+ RGWDataChangesLog& datalog,
-+ int shards)
-+ : RGWDataChangesBE(ioctx, datalog) {
- fifos.resize(shards);
- for (auto i = 0; i < shards; ++i) {
- auto r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
-@@ -362,6 +372,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- RGWDataChangesLog::RGWDataChangesLog(CephContext* cct)
- : cct(cct),
- num_shards(cct->_conf->rgw_data_log_num_shards),
-+ prefix(get_prefix()),
- changes(cct->_conf->rgw_data_log_changes_size) {}
-
- int RGWDataChangesLog::start(const RGWZone* _zone,
-@@ -382,11 +393,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
- << ", pool=" << log_pool << dendl;
- return -r;
- }
-+
- auto found = log_backing_type(ioctx, *defbacking, num_shards,
-- [this](int i) {
-- return RGWDataChangesBE::get_oid(cct, i);
-- },
-- null_yield);
-+ [this](int i) { return get_oid(i); },
-+ null_yield);
-
- if (!found) {
- lderr(cct) << __PRETTY_FUNCTION__
-@@ -396,10 +406,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
- try {
- switch (*found) {
- case log_type::omap:
-- be = std::make_unique<RGWDataChangesOmap>(ioctx, num_shards);
-+ be = std::make_unique<RGWDataChangesOmap>(ioctx, *this, num_shards);
- break;
- case log_type::fifo:
-- be = std::make_unique<RGWDataChangesFIFO>(ioctx, num_shards);
-+ be = std::make_unique<RGWDataChangesFIFO>(ioctx, *this, num_shards);
- break;
- }
- } catch (bs::system_error& e) {
-@@ -521,7 +531,7 @@ bool RGWDataChangesLog::filter_bucket(const rgw_bucket& bucket,
- }
-
- std::string RGWDataChangesLog::get_oid(int i) const {
-- return be->get_oid(i);
-+ return fmt::format("{}.{}", prefix, i);
- }
-
- int RGWDataChangesLog::add_entry(const RGWBucketInfo& bucket_info, int shard_id) {
-diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
-index f6f52382f0947..387d50a1d4964 100644
---- a/src/rgw/rgw_datalog.h
-+++ b/src/rgw/rgw_datalog.h
-@@ -20,6 +20,7 @@
-
- #include "include/buffer.h"
- #include "include/encoding.h"
-+#include "include/function2.hpp"
-
- #include "include/rados/librados.hpp"
-
-@@ -114,34 +115,24 @@ struct RGWDataChangesLogMarker {
- RGWDataChangesLogMarker() = default;
- };
-
-+class RGWDataChangesLog;
-+
- class RGWDataChangesBE {
- protected:
- librados::IoCtx& ioctx;
- CephContext* const cct;
-+ RGWDataChangesLog& datalog;
- private:
-- std::string prefix;
-- static std::string_view get_prefix(CephContext* cct) {
-- std::string_view prefix = cct->_conf->rgw_data_log_obj_prefix;
-- if (prefix.empty()) {
-- prefix = "data_log"sv;
-- }
-- return prefix;
-- }
- public:
- using entries = std::variant<std::list<cls_log_entry>,
- std::vector<ceph::buffer::list>>;
-
-- RGWDataChangesBE(librados::IoCtx& ioctx)
-+ RGWDataChangesBE(librados::IoCtx& ioctx,
-+ RGWDataChangesLog& datalog)
- : ioctx(ioctx), cct(static_cast<CephContext*>(ioctx.cct())),
-- prefix(get_prefix(cct)) {}
-+ datalog(datalog) {}
- virtual ~RGWDataChangesBE() = default;
-
-- static std::string get_oid(CephContext* cct, int i) {
-- return fmt::format("{}.{}", get_prefix(cct), i);
-- }
-- std::string get_oid(int i) {
-- return fmt::format("{}.{}", prefix, i);
-- }
- virtual void prepare(ceph::real_time now,
- const std::string& key,
- ceph::buffer::list&& entry,
-@@ -169,6 +160,11 @@ class RGWDataChangesLog {
- std::unique_ptr<RGWDataChangesBE> be;
-
- const int num_shards;
-+ std::string get_prefix() {
-+ auto prefix = cct->_conf->rgw_data_log_obj_prefix;
-+ return prefix.empty() ? prefix : "data_log"s;
-+ }
-+ std::string prefix;
-
- ceph::mutex lock = ceph::make_mutex("RGWDataChangesLog::lock");
- ceph::shared_mutex modified_lock =
-
-From 504b024fa9f4cb054109c00e527eb0dc08b9b4ce Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Sun, 3 Jan 2021 18:32:50 -0500
-Subject: [PATCH 07/26] rgw/datalog: make get_oid take generation
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit f7b850f7aa84d9cf24b4eaebbe51c7ee221bbd44)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_datalog.cc | 27 +++++++------
- src/rgw/rgw_datalog.h | 78 ++++++++++++++++++++-----------------
- src/rgw/rgw_trim_datalog.cc | 2 +-
- 3 files changed, 57 insertions(+), 50 deletions(-)
-
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index 9fc2fff83c103..329657d463125 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -73,14 +73,13 @@ void rgw_data_change_log_entry::decode_json(JSONObj *obj) {
- class RGWDataChangesOmap final : public RGWDataChangesBE {
- using centries = std::list<cls_log_entry>;
- std::vector<std::string> oids;
-- std::string get_oid(int i) const {
-- return datalog.get_oid(i);
-- }
-+
- public:
- RGWDataChangesOmap(lr::IoCtx& ioctx,
- RGWDataChangesLog& datalog,
-+ uint64_t gen_id,
- int num_shards)
-- : RGWDataChangesBE(ioctx, datalog) {
-+ : RGWDataChangesBE(ioctx, datalog, gen_id) {
- oids.reserve(num_shards);
- for (auto i = 0; i < num_shards; ++i) {
- oids.push_back(get_oid(i));
-@@ -208,14 +207,12 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
- class RGWDataChangesFIFO final : public RGWDataChangesBE {
- using centries = std::vector<ceph::buffer::list>;
- std::vector<std::unique_ptr<rgw::cls::fifo::FIFO>> fifos;
-- std::string get_oid(int i) const {
-- return datalog.get_oid(i);
-- }
-+
- public:
- RGWDataChangesFIFO(lr::IoCtx& ioctx,
- RGWDataChangesLog& datalog,
-- int shards)
-- : RGWDataChangesBE(ioctx, datalog) {
-+ uint64_t gen_id, int shards)
-+ : RGWDataChangesBE(ioctx, datalog, gen_id) {
- fifos.resize(shards);
- for (auto i = 0; i < shards; ++i) {
- auto r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
-@@ -395,7 +392,7 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
- }
-
- auto found = log_backing_type(ioctx, *defbacking, num_shards,
-- [this](int i) { return get_oid(i); },
-+ [this](int i) { return get_oid(0, i); },
- null_yield);
-
- if (!found) {
-@@ -406,10 +403,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
- try {
- switch (*found) {
- case log_type::omap:
-- be = std::make_unique<RGWDataChangesOmap>(ioctx, *this, num_shards);
-+ be = std::make_unique<RGWDataChangesOmap>(ioctx, *this, 0, num_shards);
- break;
- case log_type::fifo:
-- be = std::make_unique<RGWDataChangesFIFO>(ioctx, *this, num_shards);
-+ be = std::make_unique<RGWDataChangesFIFO>(ioctx, *this, 0, num_shards);
- break;
- }
- } catch (bs::system_error& e) {
-@@ -530,8 +527,10 @@ bool RGWDataChangesLog::filter_bucket(const rgw_bucket& bucket,
- return bucket_filter(bucket, y);
- }
-
--std::string RGWDataChangesLog::get_oid(int i) const {
-- return fmt::format("{}.{}", prefix, i);
-+std::string RGWDataChangesLog::get_oid(uint64_t gen_id, int i) const {
-+ return (gen_id > 0 ?
-+ fmt::format("{}@G{}.{}", prefix, gen_id, i) :
-+ fmt::format("{}.{}", prefix, i));
- }
-
- int RGWDataChangesLog::add_entry(const RGWBucketInfo& bucket_info, int shard_id) {
-diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
-index 387d50a1d4964..2a73237b38d2d 100644
---- a/src/rgw/rgw_datalog.h
-+++ b/src/rgw/rgw_datalog.h
-@@ -117,40 +117,7 @@ struct RGWDataChangesLogMarker {
-
- class RGWDataChangesLog;
-
--class RGWDataChangesBE {
--protected:
-- librados::IoCtx& ioctx;
-- CephContext* const cct;
-- RGWDataChangesLog& datalog;
--private:
--public:
-- using entries = std::variant<std::list<cls_log_entry>,
-- std::vector<ceph::buffer::list>>;
--
-- RGWDataChangesBE(librados::IoCtx& ioctx,
-- RGWDataChangesLog& datalog)
-- : ioctx(ioctx), cct(static_cast<CephContext*>(ioctx.cct())),
-- datalog(datalog) {}
-- virtual ~RGWDataChangesBE() = default;
--
-- virtual void prepare(ceph::real_time now,
-- const std::string& key,
-- ceph::buffer::list&& entry,
-- entries& out) = 0;
-- virtual int push(int index, entries&& items) = 0;
-- virtual int push(int index, ceph::real_time now,
-- const std::string& key,
-- ceph::buffer::list&& bl) = 0;
-- virtual int list(int shard, int max_entries,
-- std::vector<rgw_data_change_log_entry>& entries,
-- std::optional<std::string_view> marker,
-- std::string* out_marker, bool* truncated) = 0;
-- virtual int get_info(int index, RGWDataChangesLogInfo *info) = 0;
-- virtual int trim(int index, std::string_view marker) = 0;
-- virtual int trim(int index, std::string_view marker,
-- librados::AioCompletion* c) = 0;
-- virtual std::string_view max_marker() const = 0;
--};
-+class RGWDataChangesBE;
-
- class RGWDataChangesLog {
- CephContext *cct;
-@@ -247,7 +214,48 @@ class RGWDataChangesLog {
- }
- // a marker that compares greater than any other
- std::string_view max_marker() const;
-- std::string get_oid(int shard_id) const;
-+ std::string get_oid(uint64_t gen_id, int shard_id) const;
-+};
-+
-+class RGWDataChangesBE {
-+protected:
-+ librados::IoCtx& ioctx;
-+ CephContext* const cct;
-+ RGWDataChangesLog& datalog;
-+ uint64_t gen_id;
-+
-+ std::string get_oid(int shard_id) {
-+ return datalog.get_oid(gen_id, shard_id);
-+ }
-+public:
-+ using entries = std::variant<std::list<cls_log_entry>,
-+ std::vector<ceph::buffer::list>>;
-+
-+ RGWDataChangesBE(librados::IoCtx& ioctx,
-+ RGWDataChangesLog& datalog,
-+ uint64_t gen_id)
-+ : ioctx(ioctx), cct(static_cast<CephContext*>(ioctx.cct())),
-+ datalog(datalog), gen_id(gen_id) {}
-+ virtual ~RGWDataChangesBE() = default;
-+
-+ virtual void prepare(ceph::real_time now,
-+ const std::string& key,
-+ ceph::buffer::list&& entry,
-+ entries& out) = 0;
-+ virtual int push(int index, entries&& items) = 0;
-+ virtual int push(int index, ceph::real_time now,
-+ const std::string& key,
-+ ceph::buffer::list&& bl) = 0;
-+ virtual int list(int shard, int max_entries,
-+ std::vector<rgw_data_change_log_entry>& entries,
-+ std::optional<std::string_view> marker,
-+ std::string* out_marker, bool* truncated) = 0;
-+ virtual int get_info(int index, RGWDataChangesLogInfo *info) = 0;
-+ virtual int trim(int index, std::string_view marker) = 0;
-+ virtual int trim(int index, std::string_view marker,
-+ librados::AioCompletion* c) = 0;
-+ virtual std::string_view max_marker() const = 0;
- };
-
-+
- #endif
-diff --git a/src/rgw/rgw_trim_datalog.cc b/src/rgw/rgw_trim_datalog.cc
-index 62f6c07d17205..85c19a7c4437b 100644
---- a/src/rgw/rgw_trim_datalog.cc
-+++ b/src/rgw/rgw_trim_datalog.cc
-@@ -202,7 +202,7 @@ class DataLogTrimPollCR : public RGWCoroutine {
- int num_shards, utime_t interval)
- : RGWCoroutine(store->ctx()), store(store), http(http),
- num_shards(num_shards), interval(interval),
-- lock_oid(store->svc()->datalog_rados->get_oid(0)),
-+ lock_oid(store->svc()->datalog_rados->get_oid(0, 0)),
- lock_cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct)),
- last_trim(num_shards)
- {}
-
-From 1436be5861c8a19bd4969c219fb2a8848f359a92 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Sun, 3 Jan 2021 19:08:09 -0500
-Subject: [PATCH 08/26] rgw: Logback generation data structures
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit b97b207928c60b48fe405ab38be15ba55f927d5c)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_log_backing.h | 62 ++++++++++++++++++++++++++++++++
- src/test/rgw/test_log_backing.cc | 18 ++++++++++
- 2 files changed, 80 insertions(+)
-
-diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
-index d769af48b01fe..8546370a3757a 100644
---- a/src/rgw/rgw_log_backing.h
-+++ b/src/rgw/rgw_log_backing.h
-@@ -13,11 +13,18 @@
-
- #include <boost/system/error_code.hpp>
-
-+#undef FMT_HEADER_ONLY
-+#define FMT_HEADER_ONLY 1
-+#include <fmt/format.h>
-+
- #include "include/rados/librados.hpp"
-+#include "include/encoding.h"
- #include "include/expected.hpp"
- #include "include/function2.hpp"
-
- #include "common/async/yield_context.h"
-+#include "common/Formatter.h"
-+#include "common/strtol.h"
-
- namespace bs = boost::system;
-
-@@ -28,6 +35,17 @@ enum class log_type {
- fifo = 1
- };
-
-+inline void encode(const log_type& type, ceph::buffer::list& bl) {
-+ auto t = static_cast<uint8_t>(type);
-+ encode(t, bl);
-+}
-+
-+inline void decode(log_type& type, bufferlist::const_iterator& bl) {
-+ uint8_t t;
-+ decode(t, bl);
-+ type = static_cast<log_type>(type);
-+}
-+
- inline std::optional<log_type> to_log_type(std::string_view s) {
- if (strncasecmp(s.data(), "omap", s.length()) == 0) {
- return log_type::omap;
-@@ -67,4 +85,48 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
- optional_yield y);
-
-
-+struct logback_generation {
-+ uint64_t gen_id = 0;
-+ log_type type;
-+ bool empty = false;
-+
-+ void encode(ceph::buffer::list& bl) const {
-+ ENCODE_START(1, 1, bl);
-+ encode(gen_id, bl);
-+ encode(type, bl);
-+ encode(empty, bl);
-+ ENCODE_FINISH(bl);
-+ }
-+
-+ void decode(bufferlist::const_iterator& bl) {
-+ DECODE_START(1, bl);
-+ decode(gen_id, bl);
-+ decode(type, bl);
-+ decode(empty, bl);
-+ DECODE_FINISH(bl);
-+ }
-+};
-+WRITE_CLASS_ENCODER(logback_generation)
-+
-+inline std::string gencursor(uint64_t gen_id, std::string_view cursor) {
-+ return (gen_id > 0 ?
-+ fmt::format("G{:0>20}@{}", gen_id, cursor) :
-+ std::string(cursor));
-+}
-+
-+inline std::pair<uint64_t, std::string_view>
-+cursorgen(std::string_view cursor_) {
-+ std::string_view cursor = cursor_;
-+ if (cursor[0] != 'G') {
-+ return { 0, cursor };
-+ }
-+ cursor.remove_prefix(1);
-+ auto gen_id = ceph::consume<uint64_t>(cursor);
-+ if (!gen_id || cursor[0] != '@') {
-+ return { 0, cursor_ };
-+ }
-+ cursor.remove_prefix(1);
-+ return { *gen_id, cursor };
-+}
-+
- #endif
-diff --git a/src/test/rgw/test_log_backing.cc b/src/test/rgw/test_log_backing.cc
-index 5180d5fc74fe8..848bd6b50c4e5 100644
---- a/src/test/rgw/test_log_backing.cc
-+++ b/src/test/rgw/test_log_backing.cc
-@@ -174,3 +174,21 @@ TEST_F(LogBacking, TestFIFOEmpty)
- get_oid, null_yield);
- ASSERT_EQ(log_type::fifo, *stat);
- }
-+
-+TEST(CursorGen, RoundTrip) {
-+ const auto pcurs = "fded"sv;
-+ {
-+ auto gc = gencursor(0, pcurs);
-+ ASSERT_EQ(pcurs, gc);
-+ auto [gen, cursor] = cursorgen(gc);
-+ ASSERT_EQ(0, gen);
-+ ASSERT_EQ(pcurs, cursor);
-+ }
-+ {
-+ auto gc = gencursor(53, pcurs);
-+ ASSERT_NE(pcurs, gc);
-+ auto [gen, cursor] = cursorgen(gc);
-+ ASSERT_EQ(53, gen);
-+ ASSERT_EQ(pcurs, cursor);
-+ }
-+}
-
-From 59f53ba6a790d16c035c7fe5f5776f69ee6f5513 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Tue, 5 Jan 2021 20:00:07 -0500
-Subject: [PATCH 09/26] rgw: Generational support for logback switching
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 6b50f6d6def59e3c4b2db2d5311a887127b4804b)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_log_backing.cc | 484 +++++++++++++++++++++++++++++++
- src/rgw/rgw_log_backing.h | 117 +++++++-
- src/test/rgw/test_log_backing.cc | 205 ++++++++++++-
- 3 files changed, 794 insertions(+), 12 deletions(-)
-
-diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
-index 63edf972a0307..eab60e672b9e8 100644
---- a/src/rgw/rgw_log_backing.cc
-+++ b/src/rgw/rgw_log_backing.cc
-@@ -2,11 +2,14 @@
- // vim: ts=8 sw=2 smarttab ft=cpp
-
- #include "cls/log/cls_log_client.h"
-+#include "cls/version/cls_version_client.h"
-
- #include "rgw_log_backing.h"
- #include "rgw_tools.h"
- #include "cls_fifo_legacy.h"
-
-+namespace cb = ceph::buffer;
-+
- static constexpr auto dout_subsys = ceph_subsys_rgw;
-
- enum class shard_check { dne, omap, fifo, corrupt };
-@@ -213,3 +216,484 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
- }
- return ec;
- }
-+
-+logback_generations::~logback_generations() {
-+ if (watchcookie > 0) {
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ auto r = ioctx.unwatch2(watchcookie);
-+ if (r < 0) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": failed unwatching oid=" << oid
-+ << ", r=" << r << dendl;
-+ }
-+ }
-+}
-+
-+bs::error_code logback_generations::setup(log_type def,
-+ optional_yield y) noexcept
-+{
-+ try {
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ // First, read.
-+ auto res = read(y);
-+ if (!res && res.error() != bs::errc::no_such_file_or_directory) {
-+ return res.error();
-+ }
-+ if (res) {
-+ std::unique_lock lock(m);
-+ std::tie(entries_, version) = std::move(*res);
-+ } else {
-+ // Are we the first? Then create generation 0 and the generations
-+ // metadata.
-+ librados::ObjectWriteOperation op;
-+ auto type = log_backing_type(ioctx, def, shards,
-+ [this](int shard) {
-+ return this->get_oid(0, shard);
-+ }, y);
-+ if (!type)
-+ return type.error();
-+
-+ logback_generation l;
-+ l.type = *type;
-+
-+ std::unique_lock lock(m);
-+ version.ver = 1;
-+ static constexpr auto TAG_LEN = 24;
-+ version.tag.clear();
-+ append_rand_alpha(cct, version.tag, version.tag, TAG_LEN);
-+ op.create(true);
-+ cls_version_set(op, version);
-+ cb::list bl;
-+ entries_.emplace(0, std::move(l));
-+ encode(entries_, bl);
-+ lock.unlock();
-+
-+ op.write_full(bl);
-+ auto r = rgw_rados_operate(ioctx, oid, &op, y);
-+ if (r < 0 && r != -EEXIST) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": failed writing oid=" << oid
-+ << ", r=" << r << dendl;
-+ bs::system_error(-r, bs::system_category());
-+ }
-+ // Did someone race us? Then re-read.
-+ if (r != 0) {
-+ res = read(y);
-+ if (!res)
-+ return res.error();
-+ if (res->first.empty())
-+ return bs::error_code(EIO, bs::system_category());
-+ auto l = res->first.begin()->second;
-+ // In the unlikely event that someone raced us, created
-+ // generation zero, incremented, then erased generation zero,
-+ // don't leave generation zero lying around.
-+ if (l.gen_id != 0) {
-+ auto ec = log_remove(ioctx, shards,
-+ [this](int shard) {
-+ return this->get_oid(0, shard);
-+ }, y);
-+ if (ec) return ec;
-+ }
-+ std::unique_lock lock(m);
-+ std::tie(entries_, version) = std::move(*res);
-+ }
-+ }
-+ // Pass all non-empty generations to the handler
-+ std::unique_lock lock(m);
-+ auto i = lowest_nomempty(entries_);
-+ entries_t e;
-+ std::copy(i, entries_.cend(),
-+ std::inserter(e, e.end()));
-+ m.unlock();
-+ auto ec = watch();
-+ if (ec) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": failed to re-establish watch, unsafe to continue: oid="
-+ << oid << ", ec=" << ec.message() << dendl;
-+ }
-+ return handle_init(std::move(e));
-+ } catch (const std::bad_alloc&) {
-+ return bs::error_code(ENOMEM, bs::system_category());
-+ }
-+}
-+
-+bs::error_code logback_generations::update(optional_yield y) noexcept
-+{
-+ try {
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ auto res = read(y);
-+ if (!res) {
-+ return res.error();
-+ }
-+
-+ std::unique_lock l(m);
-+ auto& [es, v] = *res;
-+ if (v == version) {
-+ // Nothing to do!
-+ return {};
-+ }
-+
-+ // Check consistency and prepare update
-+ if (es.empty()) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": INCONSISTENCY! Read empty update." << dendl;
-+ return bs::error_code(EFAULT, bs::system_category());
-+ }
-+ auto cur_lowest = lowest_nomempty(entries_);
-+ // Straight up can't happen
-+ assert(cur_lowest != entries_.cend());
-+ auto new_lowest = lowest_nomempty(es);
-+ if (new_lowest == es.cend()) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": INCONSISTENCY! Read update with no active head." <<
dendl;
-+ return bs::error_code(EFAULT, bs::system_category());
-+ }
-+ if (new_lowest->first < cur_lowest->first) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": INCONSISTENCY! Tail moved wrong way." << dendl;
-+ return bs::error_code(EFAULT, bs::system_category());
-+ }
-+
-+ std::optional<uint64_t> highest_empty;
-+ if (new_lowest->first > cur_lowest->first && new_lowest !=
es.begin()) {
-+ --new_lowest;
-+ highest_empty = new_lowest->first;
-+ }
-+
-+ entries_t new_entries;
-+
-+ if ((es.end() - 1)->first < (entries_.end() - 1)->first) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": INCONSISTENCY! Head moved wrong way." << dendl;
-+ return bs::error_code(EFAULT, bs::system_category());
-+ }
-+
-+ if ((es.end() - 1)->first > (entries_.end() - 1)->first) {
-+ auto ei = es.lower_bound((entries_.end() - 1)->first + 1);
-+ std::copy(ei, es.end(), std::inserter(new_entries, new_entries.end()));
-+ }
-+
-+ // Everything checks out!
-+
-+ version = v;
-+ entries_ = es;
-+ l.unlock();
-+
-+ if (highest_empty) {
-+ auto ec = handle_empty_to(*highest_empty);
-+ if (ec) return ec;
-+ }
-+
-+ if (!new_entries.empty()) {
-+ auto ec = handle_new_gens(std::move(new_entries));
-+ if (ec) return ec;
-+ }
-+ } catch (const std::bad_alloc&) {
-+ return bs::error_code(ENOMEM, bs::system_category());
-+ }
-+ return {};
-+}
-+
-+auto logback_generations::read(optional_yield y) noexcept ->
-+ tl::expected<std::pair<entries_t, obj_version>, bs::error_code>
-+{
-+ try {
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ librados::ObjectReadOperation op;
-+ std::unique_lock l(m);
-+ cls_version_check(op, version, VER_COND_GE);
-+ l.unlock();
-+ obj_version v2;
-+ cls_version_read(op, &v2);
-+ cb::list bl;
-+ op.read(0, 0, &bl, nullptr);
-+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, y);
-+ if (r < 0) {
-+ if (r == -ENOENT) {
-+ ldout(cct, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": oid=" << oid
-+ << " not found" << dendl;
-+ } else {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": failed reading oid=" << oid
-+ << ", r=" << r << dendl;
-+ }
-+ return tl::unexpected(bs::error_code(-r, bs::system_category()));
-+ }
-+ auto bi = bl.cbegin();
-+ entries_t e;
-+ try {
-+ decode(e, bi);
-+ } catch (const cb::error& err) {
-+ return tl::unexpected(err.code());
-+ }
-+ return std::pair{ std::move(e), std::move(v2) };
-+ } catch (const std::bad_alloc&) {
-+ return tl::unexpected(bs::error_code(ENOMEM, bs::system_category()));
-+ }
-+}
-+
-+bs::error_code logback_generations::write(entries_t&& e,
-+ std::unique_lock<std::mutex>&& l_,
-+ optional_yield y) noexcept
-+{
-+ auto l = std::move(l_);
-+ ceph_assert(l.mutex() == &m &&
-+ l.owns_lock());
-+ try {
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ librados::ObjectWriteOperation op;
-+ cls_version_check(op, version, VER_COND_GE);
-+ cb::list bl;
-+ encode(e, bl);
-+ op.write_full(bl);
-+ cls_version_inc(op);
-+ auto r = rgw_rados_operate(ioctx, oid, &op, y);
-+ if (r == 0) {
-+ entries_ = std::move(e);
-+ version.inc();
-+ return {};
-+ }
-+ l.unlock();
-+ if (r < 0 && r != -ECANCELED) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": failed reading oid=" << oid
-+ << ", r=" << r << dendl;
-+ return { -r, bs::system_category() };
-+ }
-+ if (r == -ECANCELED) {
-+ auto ec = update(y);
-+ if (ec) {
-+ return ec;
-+ } else {
-+ return { ECANCELED, bs::system_category() };
-+ }
-+ }
-+ } catch (const std::bad_alloc&) {
-+ return { ENOMEM, bs::system_category() };
-+ }
-+ return {};
-+}
-+
-+
-+bs::error_code logback_generations::watch() noexcept {
-+ try {
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ auto r = ioctx.watch2(oid, &watchcookie, this);
-+ if (r < 0) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": failed to set watch oid=" << oid
-+ << ", r=" << r << dendl;
-+ return { -r, bs::system_category() };
-+ }
-+ } catch (const std::bad_alloc&) {
-+ return bs::error_code(ENOMEM, bs::system_category());
-+ }
-+ return {};
-+}
-+
-+bs::error_code logback_generations::new_backing(log_type type,
-+ optional_yield y) noexcept {
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ static constexpr auto max_tries = 10;
-+ try {
-+ auto ec = update(y);
-+ if (ec) return ec;
-+ auto tries = 0;
-+ entries_t new_entries;
-+ do {
-+ std::unique_lock l(m);
-+ auto last = entries_.end() - 1;
-+ if (last->second.type == type) {
-+ // Nothing to be done
-+ return {};
-+ }
-+ auto newgenid = last->first + 1;
-+ logback_generation newgen;
-+ newgen.gen_id = newgenid;
-+ newgen.type = type;
-+ new_entries.emplace(newgenid, newgen);
-+ auto es = entries_;
-+ es.emplace(newgenid, std::move(newgen));
-+ ec = write(std::move(es), std::move(l), y);
-+ ++tries;
-+ } while (ec == bs::errc::operation_canceled &&
-+ tries < max_tries);
-+ if (tries >= max_tries) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": exhausted retry attempts." << dendl;
-+ return ec;
-+ }
-+
-+ if (ec) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": write failed with ec=" << ec.message() << dendl;
-+ return ec;
-+ }
-+
-+ cb::list bl, rbl;
-+
-+ auto r = rgw_rados_notify(ioctx, oid, bl, 10'000, &rbl, y);
-+ if (r < 0) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": notify failed with r=" << r << dendl;
-+ return { -r, bs::system_category() };
-+ }
-+ ec = handle_new_gens(new_entries);
-+ } catch (const std::bad_alloc&) {
-+ return bs::error_code(ENOMEM, bs::system_category());
-+ }
-+ return {};
-+}
-+
-+bs::error_code logback_generations::empty_to(uint64_t gen_id,
-+ optional_yield y) noexcept {
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ static constexpr auto max_tries = 10;
-+ try {
-+ auto ec = update(y);
-+ if (ec) return ec;
-+ auto tries = 0;
-+ uint64_t newtail = 0;
-+ do {
-+ std::unique_lock l(m);
-+ {
-+ auto last = entries_.end() - 1;
-+ if (gen_id >= last->first) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": Attempt to trim beyond the possible." << dendl;
-+ return bs::error_code(EINVAL, bs::system_category());
-+ }
-+ }
-+ auto es = entries_;
-+ auto ei = es.upper_bound(gen_id);
-+ if (ei == es.begin()) {
-+ // Nothing to be done.
-+ return {};
-+ }
-+ for (auto i = es.begin(); i < ei; ++i) {
-+ newtail = i->first;
-+ i->second.empty = true;
-+ }
-+ ec = write(std::move(es), std::move(l), y);
-+ ++tries;
-+ } while (ec == bs::errc::operation_canceled &&
-+ tries < max_tries);
-+ if (tries >= max_tries) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": exhausted retry attempts." << dendl;
-+ return ec;
-+ }
-+
-+ if (ec) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": write failed with ec=" << ec.message() << dendl;
-+ return ec;
-+ }
-+
-+ cb::list bl, rbl;
-+
-+ auto r = rgw_rados_notify(ioctx, oid, bl, 10'000, &rbl, y);
-+ if (r < 0) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": notify failed with r=" << r << dendl;
-+ return { -r, bs::system_category() };
-+ }
-+ ec = handle_empty_to(newtail);
-+ } catch (const std::bad_alloc&) {
-+ return bs::error_code(ENOMEM, bs::system_category());
-+ }
-+ return {};
-+}
-+
-+bs::error_code logback_generations::remove_empty(optional_yield y) noexcept {
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ static constexpr auto max_tries = 10;
-+ try {
-+ auto ec = update(y);
-+ if (ec) return ec;
-+ auto tries = 0;
-+ entries_t new_entries;
-+ std::unique_lock l(m);
-+ ceph_assert(!entries_.empty());
-+ auto i = lowest_nomempty(entries_);
-+ if (i == entries_.begin()) {
-+ return {};
-+ }
-+ auto ln = i->first;
-+ entries_t es;
-+ std::copy(entries_.cbegin(), i,
-+ std::inserter(es, es.end()));
-+ l.unlock();
-+ do {
-+ for (const auto& [gen_id, e] : es) {
-+ ceph_assert(e.empty);
-+ auto ec = log_remove(ioctx, shards,
-+ [this, gen_id](int shard) {
-+ return this->get_oid(gen_id, shard);
-+ }, y);
-+ if (ec) {
-+ return ec;
-+ }
-+ }
-+ l.lock();
-+ i = entries_.find(ln);
-+ es.clear();
-+ std::copy(i, entries_.cend(), std::inserter(es, es.end()));
-+ ec = write(std::move(es), std::move(l), y);
-+ ++tries;
-+ } while (ec == bs::errc::operation_canceled &&
-+ tries < max_tries);
-+ if (tries >= max_tries) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": exhausted retry attempts." << dendl;
-+ return ec;
-+ }
-+
-+ if (ec) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": write failed with ec=" << ec.message() << dendl;
-+ return ec;
-+ }
-+ } catch (const std::bad_alloc&) {
-+ return bs::error_code(ENOMEM, bs::system_category());
-+ }
-+ return {};
-+}
-+
-+void logback_generations::handle_notify(uint64_t notify_id,
-+ uint64_t cookie,
-+ uint64_t notifier_id,
-+ bufferlist& bl)
-+{
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ if (notifier_id != my_id) {
-+ auto ec = update(null_yield);
-+ if (ec) {
-+ lderr(cct)
-+ << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": update failed, no one to report to and no safe way to continue."
-+ << dendl;
-+ abort();
-+ }
-+ }
-+ cb::list rbl;
-+ ioctx.notify_ack(oid, notify_id, watchcookie, rbl);
-+}
-+
-+void logback_generations::handle_error(uint64_t cookie, int err) {
-+ auto cct = static_cast<CephContext*>(ioctx.cct());
-+ auto r = ioctx.unwatch2(watchcookie);
-+ if (r < 0) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": failed to set unwatch oid=" << oid
-+ << ", r=" << r << dendl;
-+ }
-+
-+ auto ec = watch();
-+ if (ec) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": failed to re-establish watch, unsafe to continue: oid="
-+ << oid << ", ec=" << ec.message() << dendl;
-+ }
-+}
-diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
-index 8546370a3757a..242bf0e1c00a4 100644
---- a/src/rgw/rgw_log_backing.h
-+++ b/src/rgw/rgw_log_backing.h
-@@ -11,6 +11,7 @@
-
- #include <strings.h>
-
-+#include <boost/container/flat_map.hpp>
- #include <boost/system/error_code.hpp>
-
- #undef FMT_HEADER_ONLY
-@@ -22,10 +23,13 @@
- #include "include/expected.hpp"
- #include "include/function2.hpp"
-
-+#include "cls/version/cls_version_types.h"
-+
- #include "common/async/yield_context.h"
- #include "common/Formatter.h"
- #include "common/strtol.h"
-
-+namespace bc = boost::container;
- namespace bs = boost::system;
-
- /// Type of log backing, stored in the mark used in the quick check,
-@@ -43,7 +47,7 @@ inline void encode(const log_type& type, ceph::buffer::list&
bl) {
- inline void decode(log_type& type, bufferlist::const_iterator& bl) {
- uint8_t t;
- decode(t, bl);
-- type = static_cast<log_type>(type);
-+ type = static_cast<log_type>(t);
- }
-
- inline std::optional<log_type> to_log_type(std::string_view s) {
-@@ -108,6 +112,117 @@ struct logback_generation {
- };
- WRITE_CLASS_ENCODER(logback_generation)
-
-+class logback_generations : public librados::WatchCtx2 {
-+public:
-+ using entries_t = bc::flat_map<uint64_t, logback_generation>;
-+
-+protected:
-+ librados::IoCtx& ioctx;
-+ logback_generations(librados::IoCtx& ioctx,
-+ std::string oid,
-+ fu2::unique_function<std::string(
-+ uint64_t, int) const>&& get_oid,
-+ int shards) noexcept
-+ : ioctx(ioctx), oid(oid), get_oid(std::move(get_oid)),
-+ shards(shards) {}
-+
-+ uint64_t my_id = ioctx.get_instance_id();
-+
-+private:
-+ const std::string oid;
-+ const fu2::unique_function<std::string(uint64_t, int) const> get_oid;
-+
-+protected:
-+ const int shards;
-+
-+ uint64_t watchcookie = 0;
-+
-+ obj_version version;
-+ std::mutex m;
-+ entries_t entries_;
-+
-+ tl::expected<std::pair<entries_t, obj_version>, bs::error_code>
-+ read(optional_yield y) noexcept;
-+ bs::error_code write(entries_t&& e,
std::unique_lock<std::mutex>&& l_,
-+ optional_yield y) noexcept;
-+ bs::error_code setup(log_type def, optional_yield y) noexcept;
-+
-+ bs::error_code watch() noexcept;
-+
-+ auto lowest_nomempty(const entries_t& es) {
-+ return std::find_if(es.begin(), es.end(),
-+ [](const auto& e) {
-+ return !e.second.empty;
-+ });
-+ }
-+
-+public:
-+
-+ /// For the use of watch/notify.
-+
-+ void handle_notify(uint64_t notify_id,
-+ uint64_t cookie,
-+ uint64_t notifier_id,
-+ bufferlist& bl) override final;
-+
-+ void handle_error(uint64_t cookie, int err) override final;
-+
-+ /// Public interface
-+
-+ virtual ~logback_generations();
-+
-+ template<typename T, typename... Args>
-+ static tl::expected<std::unique_ptr<T>, bs::error_code>
-+ init(librados::IoCtx& ioctx_, std::string oid_,
-+ fu2::unique_function<std::string(uint64_t, int) const>&& get_oid_,
-+ int shards_, log_type def, optional_yield y,
-+ Args&& ...args) noexcept {
-+ try {
-+ T* lgp = new T(ioctx_, std::move(oid_),
-+ std::move(get_oid_),
-+ shards_, std::forward<Args>(args)...);
-+ std::unique_ptr<T> lg(lgp);
-+ lgp = nullptr;
-+ auto ec = lg->setup(def, y);
-+ if (ec)
-+ return tl::unexpected(ec);
-+ // Obnoxiousness for C++ Compiler in Bionic Beaver
-+ return tl::expected<std::unique_ptr<T>,
bs::error_code>(std::move(lg));
-+ } catch (const std::bad_alloc&) {
-+ return tl::unexpected(bs::error_code(ENOMEM, bs::system_category()));
-+ }
-+ }
-+
-+ bs::error_code update(optional_yield y) noexcept;
-+
-+ entries_t entries() const {
-+ return entries_;
-+ }
-+
-+ bs::error_code new_backing(log_type type, optional_yield y) noexcept;
-+
-+ bs::error_code empty_to(uint64_t gen_id, optional_yield y) noexcept;
-+
-+ bs::error_code remove_empty(optional_yield y) noexcept;
-+
-+ // Callbacks, to be defined by descendant.
-+
-+ /// Handle initialization on startup
-+ ///
-+ /// @param e All non-empty generations
-+ virtual bs::error_code handle_init(entries_t e) noexcept = 0;
-+
-+ /// Handle new generations.
-+ ///
-+ /// @param e Map of generations added since last update
-+ virtual bs::error_code handle_new_gens(entries_t e) noexcept = 0;
-+
-+ /// Handle generations being marked empty
-+ ///
-+ /// @param new_tail Lowest non-empty generation
-+ virtual bs::error_code handle_empty_to(uint64_t new_tail) noexcept = 0;
-+};
-+
- inline std::string gencursor(uint64_t gen_id, std::string_view cursor) {
- return (gen_id > 0 ?
- fmt::format("G{:0>20}@{}", gen_id, cursor) :
-diff --git a/src/test/rgw/test_log_backing.cc b/src/test/rgw/test_log_backing.cc
-index 848bd6b50c4e5..166de2dd8242c 100644
---- a/src/test/rgw/test_log_backing.cc
-+++ b/src/test/rgw/test_log_backing.cc
-@@ -46,17 +46,23 @@ class LogBacking : public testing::Test {
- const std::string pool_name = get_temp_pool_name();
- lr::Rados rados;
- lr::IoCtx ioctx;
-+ lr::Rados rados2;
-+ lr::IoCtx ioctx2;
-
- void SetUp() override {
- ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
- ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
-+ connect_cluster_pp(rados2);
-+ ASSERT_EQ(0, rados2.ioctx_create(pool_name.c_str(), ioctx2));
- }
- void TearDown() override {
- destroy_one_pool_pp(pool_name, rados);
- }
-
-- static std::string get_oid(int i) {
-- return fmt::format("shard.{}", i);
-+ std::string get_oid(uint64_t gen_id, int i) const {
-+ return (gen_id > 0 ?
-+ fmt::format("shard@G{}.{}", gen_id, i) :
-+ fmt::format("shard.{}", i));
- }
-
- void make_omap() {
-@@ -66,7 +72,7 @@ class LogBacking : public testing::Test {
- cb::list bl;
- encode(i, bl);
- cls_log_add(op, ceph_clock_now(), {}, "meow", bl);
-- auto r = rgw_rados_operate(ioctx, get_oid(i), &op, null_yield);
-+ auto r = rgw_rados_operate(ioctx, get_oid(0, i), &op, null_yield);
- ASSERT_GE(r, 0);
- }
- }
-@@ -77,13 +83,13 @@ class LogBacking : public testing::Test {
- cb::list bl;
- encode(i, bl);
- cls_log_add(op, ceph_clock_now(), {}, "meow", bl);
-- auto r = rgw_rados_operate(ioctx, get_oid(i), &op, null_yield);
-+ auto r = rgw_rados_operate(ioctx, get_oid(0, i), &op, null_yield);
- ASSERT_GE(r, 0);
- }
-
- void empty_omap() {
- for (int i = 0; i < SHARDS; ++i) {
-- auto oid = get_oid(i);
-+ auto oid = get_oid(0, i);
- std::string to_marker;
- {
- lr::ObjectReadOperation op;
-@@ -116,7 +122,7 @@ class LogBacking : public testing::Test {
- {
- for (int i = 0; i < SHARDS; ++i) {
- std::unique_ptr<RCf::FIFO> fifo;
-- auto r = RCf::FIFO::create(ioctx, get_oid(i), &fifo, null_yield);
-+ auto r = RCf::FIFO::create(ioctx, get_oid(0, i), &fifo, null_yield);
- ASSERT_EQ(0, r);
- ASSERT_TRUE(fifo);
- }
-@@ -126,7 +132,7 @@ class LogBacking : public testing::Test {
- {
- using ceph::encode;
- std::unique_ptr<RCf::FIFO> fifo;
-- auto r = RCf::FIFO::open(ioctx, get_oid(i), &fifo, null_yield);
-+ auto r = RCf::FIFO::open(ioctx, get_oid(0, i), &fifo, null_yield);
- ASSERT_GE(0, r);
- ASSERT_TRUE(fifo);
- cb::list bl;
-@@ -149,14 +155,16 @@ TEST_F(LogBacking, TestOmap)
- {
- make_omap();
- auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
-- get_oid, null_yield);
-+ [this](int shard){ return get_oid(0, shard); },
-+ null_yield);
- ASSERT_EQ(log_type::omap, *stat);
- }
-
- TEST_F(LogBacking, TestOmapEmpty)
- {
- auto stat = log_backing_type(ioctx, log_type::omap, SHARDS,
-- get_oid, null_yield);
-+ [this](int shard){ return get_oid(0, shard); },
-+ null_yield);
- ASSERT_EQ(log_type::omap, *stat);
- }
-
-@@ -164,14 +172,16 @@ TEST_F(LogBacking, TestFIFO)
- {
- make_fifo();
- auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
-- get_oid, null_yield);
-+ [this](int shard){ return get_oid(0, shard); },
-+ null_yield);
- ASSERT_EQ(log_type::fifo, *stat);
- }
-
- TEST_F(LogBacking, TestFIFOEmpty)
- {
- auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
-- get_oid, null_yield);
-+ [this](int shard){ return get_oid(0, shard); },
-+ null_yield);
- ASSERT_EQ(log_type::fifo, *stat);
- }
-
-@@ -192,3 +202,176 @@ TEST(CursorGen, RoundTrip) {
- ASSERT_EQ(pcurs, cursor);
- }
- }
-+
-+class generations final : public logback_generations {
-+public:
-+
-+ entries_t got_entries;
-+ std::optional<uint64_t> tail;
-+
-+ using logback_generations::logback_generations;
-+
-+ bs::error_code handle_init(entries_t e) noexcept {
-+ got_entries = e;
-+ return {};
-+ }
-+
-+ bs::error_code handle_new_gens(entries_t e) noexcept {
-+ got_entries = e;
-+ return {};
-+ }
-+
-+ bs::error_code handle_empty_to(uint64_t new_tail) noexcept {
-+ tail = new_tail;
-+ return {};
-+ }
-+};
-+
-+TEST_F(LogBacking, GenerationSingle)
-+{
-+ auto lgr = logback_generations::init<generations>(
-+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
-+ return get_oid(gen_id, shard);
-+ }, SHARDS, log_type::fifo, null_yield);
-+ ASSERT_TRUE(lgr);
-+
-+ auto lg = std::move(*lgr);
-+
-+ ASSERT_EQ(0, lg->got_entries.begin()->first);
-+
-+ ASSERT_EQ(0, lg->got_entries[0].gen_id);
-+ ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
-+ ASSERT_FALSE(lg->got_entries[0].empty);
-+
-+ auto ec = lg->empty_to(0, null_yield);
-+ ASSERT_TRUE(ec);
-+
-+
-+ lg.reset();
-+
-+ lg = *logback_generations::init<generations>(
-+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
-+ return get_oid(gen_id, shard);
-+ }, SHARDS, log_type::fifo, null_yield);
-+
-+ ASSERT_EQ(0, lg->got_entries.begin()->first);
-+
-+ ASSERT_EQ(0, lg->got_entries[0].gen_id);
-+ ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
-+ ASSERT_FALSE(lg->got_entries[0].empty);
-+
-+ lg->got_entries.clear();
-+
-+ ec = lg->new_backing(log_type::omap, null_yield);
-+ ASSERT_FALSE(ec);
-+
-+ ASSERT_EQ(1, lg->got_entries.size());
-+ ASSERT_EQ(1, lg->got_entries[1].gen_id);
-+ ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
-+ ASSERT_FALSE(lg->got_entries[1].empty);
-+
-+ lg.reset();
-+
-+ lg = *logback_generations::init<generations>(
-+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
-+ return get_oid(gen_id, shard);
-+ }, SHARDS, log_type::fifo, null_yield);
-+
-+ ASSERT_EQ(2, lg->got_entries.size());
-+ ASSERT_EQ(0, lg->got_entries[0].gen_id);
-+ ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
-+ ASSERT_FALSE(lg->got_entries[0].empty);
-+
-+ ASSERT_EQ(1, lg->got_entries[1].gen_id);
-+ ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
-+ ASSERT_FALSE(lg->got_entries[1].empty);
-+
-+ ec = lg->empty_to(0, null_yield);
-+ ASSERT_FALSE(ec);
-+
-+ ASSERT_EQ(0, *lg->tail);
-+
-+ lg.reset();
-+
-+ lg = *logback_generations::init<generations>(
-+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
-+ return get_oid(gen_id, shard);
-+ }, SHARDS, log_type::fifo, null_yield);
-+
-+ ASSERT_EQ(1, lg->got_entries.size());
-+ ASSERT_EQ(1, lg->got_entries[1].gen_id);
-+ ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
-+ ASSERT_FALSE(lg->got_entries[1].empty);
-+
-+ ec = lg->remove_empty(null_yield);
-+ ASSERT_FALSE(ec);
-+
-+ auto entries = lg->entries();
-+ ASSERT_EQ(1, entries.size());
-+
-+ ASSERT_EQ(1, entries[1].gen_id);
-+ ASSERT_EQ(log_type::omap, entries[1].type);
-+ ASSERT_FALSE(entries[1].empty);
-+
-+ lg.reset();
-+}
-+
-+TEST_F(LogBacking, GenerationWN)
-+{
-+ auto lg1 = *logback_generations::init<generations>(
-+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
-+ return get_oid(gen_id, shard);
-+ }, SHARDS, log_type::fifo, null_yield);
-+
-+ auto ec = lg1->new_backing(log_type::omap, null_yield);
-+ ASSERT_FALSE(ec);
-+
-+ ASSERT_EQ(1, lg1->got_entries.size());
-+ ASSERT_EQ(1, lg1->got_entries[1].gen_id);
-+ ASSERT_EQ(log_type::omap, lg1->got_entries[1].type);
-+ ASSERT_FALSE(lg1->got_entries[1].empty);
-+
-+ lg1->got_entries.clear();
-+
-+ auto lg2 = *logback_generations::init<generations>(
-+ ioctx2, "foobar", [this](uint64_t gen_id, int shard) {
-+ return get_oid(gen_id, shard);
-+ }, SHARDS, log_type::fifo, null_yield);
-+
-+ ASSERT_EQ(2, lg2->got_entries.size());
-+
-+ ASSERT_EQ(0, lg2->got_entries[0].gen_id);
-+ ASSERT_EQ(log_type::fifo, lg2->got_entries[0].type);
-+ ASSERT_FALSE(lg2->got_entries[0].empty);
-+
-+ ASSERT_EQ(1, lg2->got_entries[1].gen_id);
-+ ASSERT_EQ(log_type::omap, lg2->got_entries[1].type);
-+ ASSERT_FALSE(lg2->got_entries[1].empty);
-+
-+ lg2->got_entries.clear();
-+
-+ ec = lg1->new_backing(log_type::fifo, null_yield);
-+ ASSERT_FALSE(ec);
-+
-+ ASSERT_EQ(1, lg1->got_entries.size());
-+ ASSERT_EQ(2, lg1->got_entries[2].gen_id);
-+ ASSERT_EQ(log_type::fifo, lg1->got_entries[2].type);
-+ ASSERT_FALSE(lg1->got_entries[2].empty);
-+
-+ ASSERT_EQ(1, lg2->got_entries.size());
-+ ASSERT_EQ(2, lg2->got_entries[2].gen_id);
-+ ASSERT_EQ(log_type::fifo, lg2->got_entries[2].type);
-+ ASSERT_FALSE(lg2->got_entries[2].empty);
-+
-+ lg1->got_entries.clear();
-+ lg2->got_entries.clear();
-+
-+ ec = lg2->empty_to(1, null_yield);
-+ ASSERT_FALSE(ec);
-+
-+ ASSERT_EQ(1, *lg1->tail);
-+ ASSERT_EQ(1, *lg2->tail);
-+
-+ lg1->tail.reset();
-+ lg2->tail.reset();
-+}
-
-From 739be4ff5322878d80a593f9364295c2ed1c1b86 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Mon, 23 Nov 2020 15:29:35 -0500
-Subject: [PATCH 10/26] rgw: Add rgw_complete_aio_completion()
-
-To manually complete an asynchronous librados call.
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 97c3f2b4e6d0a8d0c2366d6dca4570e063af7953)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/cls_fifo_legacy.cc | 24 +++---------------------
- src/rgw/rgw_datalog.cc | 22 +---------------------
- src/rgw/rgw_tools.cc | 8 ++++++++
- src/rgw/rgw_tools.h | 5 +++++
- 4 files changed, 17 insertions(+), 42 deletions(-)
-
-diff --git a/src/rgw/cls_fifo_legacy.cc b/src/rgw/cls_fifo_legacy.cc
-index 569a3e77c458f..f95b796152d33 100644
---- a/src/rgw/cls_fifo_legacy.cc
-+++ b/src/rgw/cls_fifo_legacy.cc
-@@ -428,28 +428,10 @@ struct Completion {
- return c;
- }
- static void complete(Ptr&& p, int r) {
-- auto c = p->_super->pc;
-+ auto c = p->_super;
- p->_super = nullptr;
-- c->lock.lock();
-- c->rval = r;
-- c->complete = true;
-- c->lock.unlock();
--
-- auto cb_complete = c->callback_complete;
-- auto cb_complete_arg = c->callback_complete_arg;
-- if (cb_complete)
-- cb_complete(c, cb_complete_arg);
--
-- auto cb_safe = c->callback_safe;
-- auto cb_safe_arg = c->callback_safe_arg;
-- if (cb_safe)
-- cb_safe(c, cb_safe_arg);
--
-- c->lock.lock();
-- c->callback_complete = nullptr;
-- c->callback_safe = nullptr;
-- c->cond.notify_all();
-- c->put_unlock();
-+ c->pc->put();
-+ rgw_complete_aio_completion(c, r);
- }
-
- static void cb(lr::completion_t, void* arg) {
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index 329657d463125..460ebd105dca8 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -333,27 +333,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- librados::AioCompletion* c) override {
- int r = 0;
- if (marker == rgw::cls::fifo::marker(0, 0).to_string()) {
-- auto pc = c->pc;
-- pc->get();
-- pc->lock.lock();
-- pc->rval = 0;
-- pc->complete = true;
-- pc->lock.unlock();
-- auto cb_complete = pc->callback_complete;
-- auto cb_complete_arg = pc->callback_complete_arg;
-- if (cb_complete)
-- cb_complete(pc, cb_complete_arg);
--
-- auto cb_safe = pc->callback_safe;
-- auto cb_safe_arg = pc->callback_safe_arg;
-- if (cb_safe)
-- cb_safe(pc, cb_safe_arg);
--
-- pc->lock.lock();
-- pc->callback_complete = NULL;
-- pc->callback_safe = NULL;
-- pc->cond.notify_all();
-- pc->put_unlock();
-+ rgw_complete_aio_completion(c, 0);
- } else {
- fifos[index]->trim(marker, false, c);
- }
-diff --git a/src/rgw/rgw_tools.cc b/src/rgw/rgw_tools.cc
-index 89a322b0675ad..82e0ecf546d60 100644
---- a/src/rgw/rgw_tools.cc
-+++ b/src/rgw/rgw_tools.cc
-@@ -11,6 +11,8 @@
- #include "include/types.h"
- #include "include/stringify.h"
-
-+#include "librados/AioCompletionImpl.h"
-+
- #include "rgw_common.h"
- #include "rgw_tools.h"
- #include "rgw_acl_s3.h"
-@@ -592,3 +594,9 @@ void rgw_tools_cleanup()
- delete ext_mime_map;
- ext_mime_map = nullptr;
- }
-+
-+void rgw_complete_aio_completion(librados::AioCompletion* c, int r) {
-+ auto pc = c->pc;
-+ librados::CB_AioCompleteAndSafe cb(pc);
-+ cb(r);
-+}
-diff --git a/src/rgw/rgw_tools.h b/src/rgw/rgw_tools.h
-index 28d251c28d6c6..cf586dabea9cf 100644
---- a/src/rgw/rgw_tools.h
-+++ b/src/rgw/rgw_tools.h
-@@ -253,4 +253,9 @@ class RGWDataAccess
-
- using RGWDataAccessRef = std::shared_ptr<RGWDataAccess>;
-
-+/// Complete an AioCompletion. To return error values or otherwise
-+/// satisfy the caller. Useful for making complicated asynchronous
-+/// calls and error handling.
-+void rgw_complete_aio_completion(librados::AioCompletion* c, int r);
-+
- #endif
-
-From af90c013b688d2a83773c5fe3ad82c262a1156cb Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Wed, 6 Jan 2021 03:40:50 -0500
-Subject: [PATCH 11/26] rgw: Lay groundwork for multigenerational datalog
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 27ca609755a2c0e8fd501be46bc20026aa33b93c)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/cls_fifo_legacy.cc | 65 ------------------
- src/rgw/cls_fifo_legacy.h | 65 ++++++++++++++++++
- src/rgw/rgw_datalog.cc | 135 ++++++++++++++++++++++++++++++++++---
- src/rgw/rgw_datalog.h | 36 ++++++++--
- src/rgw/rgw_log_backing.h | 9 +++
- 5 files changed, 232 insertions(+), 78 deletions(-)
-
-diff --git a/src/rgw/cls_fifo_legacy.cc b/src/rgw/cls_fifo_legacy.cc
-index f95b796152d33..3ddb2578d3541 100644
---- a/src/rgw/cls_fifo_legacy.cc
-+++ b/src/rgw/cls_fifo_legacy.cc
-@@ -32,10 +32,6 @@
- #include "cls/fifo/cls_fifo_types.h"
- #include "cls/fifo/cls_fifo_ops.h"
-
--#include "librados/AioCompletionImpl.h"
--
--#include "rgw_tools.h"
--
- #include "cls_fifo_legacy.h"
-
- namespace rgw::cls::fifo {
-@@ -382,67 +378,6 @@ struct partinfo_completion : public lr::ObjectOperationCompletion {
- }
- };
-
--template<typename T>
--struct Completion {
--private:
-- lr::AioCompletion* _cur = nullptr;
-- lr::AioCompletion* _super;
--public:
--
-- using Ptr = std::unique_ptr<T>;
--
-- lr::AioCompletion* cur() const {
-- return _cur;
-- }
-- lr::AioCompletion* super() const {
-- return _super;
-- }
--
-- Completion(lr::AioCompletion* super) : _super(super) {
-- super->pc->get();
-- }
--
-- ~Completion() {
-- if (_super) {
-- _super->pc->put();
-- }
-- if (_cur)
-- _cur->release();
-- _super = nullptr;
-- _cur = nullptr;
-- }
--
-- // The only times that aio_operate can return an error are:
-- // 1. The completion contains a null pointer. This should just
-- // crash, and in our case it does.
-- // 2. An attempt is made to write to a snapshot. RGW doesn't use
-- // snapshots, so we don't care.
-- //
-- // So we will just assert that initiating an Aio operation succeeds
-- // and not worry about recovering.
-- static lr::AioCompletion* call(Ptr&& p) {
-- p->_cur = lr::Rados::aio_create_completion(static_cast<void*>(p.get()),
-- &cb);
-- auto c = p->_cur;
-- p.release();
-- return c;
-- }
-- static void complete(Ptr&& p, int r) {
-- auto c = p->_super;
-- p->_super = nullptr;
-- c->pc->put();
-- rgw_complete_aio_completion(c, r);
-- }
--
-- static void cb(lr::completion_t, void* arg) {
-- auto t = static_cast<T*>(arg);
-- auto r = t->_cur->get_return_value();
-- t->_cur->release();
-- t->_cur = nullptr;
-- t->handle(Ptr(t), r);
-- }
--};
--
- lr::ObjectReadOperation get_part_info(CephContext* cct,
- fifo::part_header* header,
- std::uint64_t tid, int* r = 0)
-diff --git a/src/rgw/cls_fifo_legacy.h b/src/rgw/cls_fifo_legacy.h
-index b6b5f04bb30ad..307abbb198918 100644
---- a/src/rgw/cls_fifo_legacy.h
-+++ b/src/rgw/cls_fifo_legacy.h
-@@ -38,6 +38,10 @@
- #include "cls/fifo/cls_fifo_types.h"
- #include "cls/fifo/cls_fifo_ops.h"
-
-+#include "librados/AioCompletionImpl.h"
-+
-+#include "rgw_tools.h"
-+
- namespace rgw::cls::fifo {
- namespace cb = ceph::buffer;
- namespace fifo = rados::cls::fifo;
-@@ -265,6 +269,67 @@ class FIFO {
- lr::AioCompletion* c //< AIO Completion
- );
- };
-+
-+template<typename T>
-+struct Completion {
-+private:
-+ lr::AioCompletion* _cur = nullptr;
-+ lr::AioCompletion* _super;
-+public:
-+
-+ using Ptr = std::unique_ptr<T>;
-+
-+ lr::AioCompletion* cur() const {
-+ return _cur;
-+ }
-+ lr::AioCompletion* super() const {
-+ return _super;
-+ }
-+
-+ Completion(lr::AioCompletion* super) : _super(super) {
-+ super->pc->get();
-+ }
-+
-+ ~Completion() {
-+ if (_super) {
-+ _super->pc->put();
-+ }
-+ if (_cur)
-+ _cur->release();
-+ _super = nullptr;
-+ _cur = nullptr;
-+ }
-+
-+ // The only times that aio_operate can return an error are:
-+ // 1. The completion contains a null pointer. This should just
-+ // crash, and in our case it does.
-+ // 2. An attempt is made to write to a snapshot. RGW doesn't use
-+ // snapshots, so we don't care.
-+ //
-+ // So we will just assert that initiating an Aio operation succeeds
-+ // and not worry about recovering.
-+ static lr::AioCompletion* call(Ptr&& p) {
-+ p->_cur = lr::Rados::aio_create_completion(static_cast<void*>(p.get()),
-+ &cb);
-+ auto c = p->_cur;
-+ p.release();
-+ return c;
-+ }
-+ static void complete(Ptr&& p, int r) {
-+ auto c = p->_super;
-+ p->_super = nullptr;
-+ rgw_complete_aio_completion(c, r);
-+ }
-+
-+ static void cb(lr::completion_t, void* arg) {
-+ auto t = static_cast<T*>(arg);
-+ auto r = t->_cur->get_return_value();
-+ t->_cur->release();
-+ t->_cur = nullptr;
-+ t->handle(Ptr(t), r);
-+ }
-+};
-+
- }
-
- #endif // CEPH_RGW_CLS_FIFO_LEGACY_H
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index 460ebd105dca8..2b04d530d1c6f 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -383,10 +383,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
- try {
- switch (*found) {
- case log_type::omap:
-- be = std::make_unique<RGWDataChangesOmap>(ioctx, *this, 0, num_shards);
-+ bes.set_zero(new RGWDataChangesOmap(ioctx, *this, 0, num_shards));
- break;
- case log_type::fifo:
-- be = std::make_unique<RGWDataChangesFIFO>(ioctx, *this, 0, num_shards);
-+ bes.set_zero(new RGWDataChangesFIFO(ioctx, *this, 0, num_shards));
- break;
- }
- } catch (bs::system_error& e) {
-@@ -396,7 +396,6 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
- return ceph::from_error_code(e.code());
- }
-
-- ceph_assert(be);
- renew_thread = make_named_thread("rgw_dt_lg_renew",
- &RGWDataChangesLog::renew_run, this);
- return 0;
-@@ -426,6 +425,7 @@ int RGWDataChangesLog::renew_entries()
- l.unlock();
-
- auto ut = real_clock::now();
-+ auto be = bes.head();
- for (const auto& bs : entries) {
- auto index = choose_oid(bs);
-
-@@ -592,6 +592,7 @@ int RGWDataChangesLog::add_entry(const RGWBucketInfo&
bucket_info, int shard_id)
-
- ldout(cct, 20) << "RGWDataChangesLog::add_entry() sending update with
now=" << now << " cur_expiration=" << expiration <<
dendl;
-
-+ auto be = bes.head();
- ret = be->push(index, now, change.key, std::move(bl));
-
- now = real_clock::now();
-@@ -615,14 +616,44 @@ int RGWDataChangesLog::add_entry(const RGWBucketInfo&
bucket_info, int shard_id)
- return ret;
- }
-
-+int DataLogBackends::list(int shard, int max_entries,
-+ std::vector<rgw_data_change_log_entry>& entries,
-+ std::optional<std::string_view> marker,
-+ std::string* out_marker, bool* truncated)
-+{
-+ auto [gen_id, cursor] = cursorgeno(marker);
-+ std::string out_cursor;
-+ while (max_entries > 0) {
-+ std::vector<rgw_data_change_log_entry> gentries;
-+ std::unique_lock l(m);
-+ auto i = lower_bound(gen_id);
-+ if (i == end()) return 0;
-+ auto be = i->second;
-+ auto r = be->list(shard, max_entries, gentries, cursor,
-+ &out_cursor, truncated);
-+ if (r < 0)
-+ return r;
-+
-+ *out_marker = gencursor(gen_id, out_cursor);
-+ for (auto& g : gentries) {
-+ g.log_id = gencursor(gen_id, g.log_id);
-+ }
-+ max_entries -= gentries.size();
-+ std::move(gentries.begin(), gentries.end(),
-+ std::back_inserter(entries));
-+ cursor = {};
-+ ++gen_id;
-+ }
-+ return 0;
-+}
-+
- int RGWDataChangesLog::list_entries(int shard, int max_entries,
- std::vector<rgw_data_change_log_entry>& entries,
- std::optional<std::string_view> marker,
- std::string* out_marker, bool* truncated)
- {
- assert(shard < num_shards);
-- return be->list(shard, max_entries, entries,
std::string(marker.value_or("")),
-- out_marker, truncated);
-+ return bes.list(shard, max_entries, entries, marker, out_marker, truncated);
- }
-
- int RGWDataChangesLog::list_entries(int max_entries,
-@@ -653,20 +684,105 @@ int RGWDataChangesLog::list_entries(int max_entries,
- int RGWDataChangesLog::get_info(int shard_id, RGWDataChangesLogInfo *info)
- {
- assert(shard_id < num_shards);
-+ auto be = bes.head();
- return be->get_info(shard_id, info);
- }
-
-+int DataLogBackends::trim_entries(int shard_id, std::string_view marker)
-+{
-+ auto [target_gen, cursor] = cursorgen(marker);
-+ std::unique_lock l(m);
-+ const auto head_gen = (end() - 1)->second->gen_id;
-+ const auto tail_gen = begin()->first;
-+ if (target_gen < tail_gen) return 0;
-+ auto r = 0;
-+ for (auto i = lower_bound(0);
-+ i != end() && i->first <= target_gen && i->first <=
head_gen && r >= 0;
-+ i = upper_bound(i->first)) {
-+ auto be = i->second;
-+ l.unlock();
-+ auto c = be->gen_id == target_gen ? cursor : be->max_marker();
-+ r = be->trim(shard_id, c);
-+ l.lock();
-+ };
-+ return r;
-+}
-+
- int RGWDataChangesLog::trim_entries(int shard_id, std::string_view marker)
- {
- assert(shard_id < num_shards);
-- return be->trim(shard_id, marker);
-+ return bes.trim_entries(shard_id, marker);
-+}
-+
-+class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
-+public:
-+ DataLogBackends* const bes;
-+ const int shard_id;
-+ const uint64_t target_gen;
-+ const std::string cursor;
-+ const uint64_t head_gen;
-+ const uint64_t tail_gen;
-+ boost::intrusive_ptr<RGWDataChangesBE> be;
-+
-+ GenTrim(DataLogBackends* bes, int shard_id, uint64_t target_gen, std::string cursor,
-+ uint64_t head_gen, uint64_t tail_gen,
-+ boost::intrusive_ptr<RGWDataChangesBE>&& be,
-+ lr::AioCompletion* super)
-+ : Completion(super), bes(bes), shard_id(shard_id), target_gen(target_gen),
-+ cursor(std::move(cursor)), head_gen(head_gen), tail_gen(tail_gen),
-+ be(std::move(be)) {}
-+
-+ void handle(Ptr&& p, int r) {
-+ auto gen_id = be->gen_id;
-+ be.reset();
-+ if (r < 0) {
-+ complete(std::move(p), r);
-+ return;
-+ }
-+
-+ {
-+ std::unique_lock l(bes->m);
-+ auto i = bes->upper_bound(gen_id);
-+ if (i == bes->end() || i->first > target_gen || i->first >
head_gen) {
-+ l.unlock();
-+ complete(std::move(p), r);
-+ return;
-+ }
-+ be = i->second;
-+ }
-+ auto c = be->gen_id == target_gen ? cursor : be->max_marker();
-+ r = be->trim(shard_id, c, call(std::move(p)));
-+ }
-+};
-+
-+void DataLogBackends::trim_entries(int shard_id, std::string_view marker,
-+ librados::AioCompletion* c)
-+{
-+ auto [target_gen, cursor] = cursorgen(marker);
-+ std::unique_lock l(m);
-+ const auto head_gen = (end() - 1)->second->gen_id;
-+ const auto tail_gen = begin()->first;
-+ if (target_gen < tail_gen) {
-+ l.unlock();
-+ rgw_complete_aio_completion(c, 0);
-+ return;
-+ }
-+ auto be = lower_bound(0)->second;
-+ l.unlock();
-+ auto p = be.get();
-+ auto gt = std::make_unique<GenTrim>(this, shard_id, target_gen,
-+ std::string(cursor), head_gen, tail_gen,
-+ std::move(be), c);
-+
-+ p->trim(shard_id, cursor, GenTrim::call(std::move(gt)));
- }
-
- int RGWDataChangesLog::trim_entries(int shard_id, std::string_view marker,
- librados::AioCompletion* c)
- {
- assert(shard_id < num_shards);
-- return be->trim(shard_id, marker, c);
-+ bes.trim_entries(shard_id, marker, c);
-+ return 0;
- }
-
- bool RGWDataChangesLog::going_down() const
-@@ -720,6 +836,7 @@ void RGWDataChangesLog::mark_modified(int shard_id, const
rgw_bucket_shard& bs)
- modified_shards[shard_id].insert(key);
- }
-
--std::string_view RGWDataChangesLog::max_marker() const {
-- return be->max_marker();
-+std::string RGWDataChangesLog::max_marker() const {
-+ return gencursor(std::numeric_limits<uint64_t>::max(),
-+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
- }
-diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
-index 2a73237b38d2d..0915bebde11cf 100644
---- a/src/rgw/rgw_datalog.h
-+++ b/src/rgw/rgw_datalog.h
-@@ -13,6 +13,8 @@
- #include <vector>
-
- #include <boost/container/flat_map.hpp>
-+#include <boost/smart_ptr/intrusive_ptr.hpp>
-+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
-
- #undef FMT_HEADER_ONLY
- #define FMT_HEADER_ONLY 1
-@@ -119,12 +121,37 @@ class RGWDataChangesLog;
-
- class RGWDataChangesBE;
-
-+class DataLogBackends
-+ : private bc::flat_map<uint64_t, boost::intrusive_ptr<RGWDataChangesBE>>
{
-+ friend class GenTrim;
-+
-+ std::mutex m;
-+public:
-+
-+ boost::intrusive_ptr<RGWDataChangesBE> head() {
-+ std::unique_lock l(m);
-+ auto i = end();
-+ --i;
-+ return i->second;
-+ }
-+ int list(int shard, int max_entries,
-+ std::vector<rgw_data_change_log_entry>& entries,
-+ std::optional<std::string_view> marker,
-+ std::string* out_marker, bool* truncated);
-+ int trim_entries(int shard_id, std::string_view marker);
-+ void trim_entries(int shard_id, std::string_view marker,
-+ librados::AioCompletion* c);
-+ void set_zero(RGWDataChangesBE* be) {
-+ emplace(0, be);
-+ }
-+};
-+
- class RGWDataChangesLog {
- CephContext *cct;
- librados::IoCtx ioctx;
- rgw::BucketChangeObserver *observer = nullptr;
- const RGWZone* zone;
-- std::unique_ptr<RGWDataChangesBE> be;
-+ DataLogBackends bes;
-
- const int num_shards;
- std::string get_prefix() {
-@@ -213,16 +240,15 @@ class RGWDataChangesLog {
- bucket_filter = std::move(f);
- }
- // a marker that compares greater than any other
-- std::string_view max_marker() const;
-+ std::string max_marker() const;
- std::string get_oid(uint64_t gen_id, int shard_id) const;
- };
-
--class RGWDataChangesBE {
-+class RGWDataChangesBE : public boost::intrusive_ref_counter<RGWDataChangesBE> {
- protected:
- librados::IoCtx& ioctx;
- CephContext* const cct;
- RGWDataChangesLog& datalog;
-- uint64_t gen_id;
-
- std::string get_oid(int shard_id) {
- return datalog.get_oid(gen_id, shard_id);
-@@ -231,6 +257,8 @@ class RGWDataChangesBE {
- using entries = std::variant<std::list<cls_log_entry>,
- std::vector<ceph::buffer::list>>;
-
-+ const uint64_t gen_id;
-+
- RGWDataChangesBE(librados::IoCtx& ioctx,
- RGWDataChangesLog& datalog,
- uint64_t gen_id)
-diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
-index 242bf0e1c00a4..55a3139d11e2b 100644
---- a/src/rgw/rgw_log_backing.h
-+++ b/src/rgw/rgw_log_backing.h
-@@ -244,4 +244,13 @@ cursorgen(std::string_view cursor_) {
- return { *gen_id, cursor };
- }
-
-+inline std::pair<uint64_t, std::string_view>
-+cursorgeno(std::optional<std::string_view> cursor) {
-+ if (cursor) {
-+ return cursorgen(*cursor);
-+ } else {
-+ return { 0, ""s };
-+ }
-+}
-+
- #endif
-
-From 2f94c171859dd938ba02e57a243558b3bb4b219c Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Tue, 26 Jan 2021 01:27:24 -0500
-Subject: [PATCH 12/26] rgw: Clamp FIFO trim to head
-
-Don't try to trim a bunch of parts that don't exist.
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 60b729e32602b7401e15957cef976386281c4ccb)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/cls_fifo_legacy.cc | 72 ++++++++++++++++++++++++++--
- src/test/rgw/test_cls_fifo_legacy.cc | 51 ++++++++++++++++++++
- 2 files changed, 120 insertions(+), 3 deletions(-)
-
-diff --git a/src/rgw/cls_fifo_legacy.cc b/src/rgw/cls_fifo_legacy.cc
-index 3ddb2578d3541..45a3ad505146a 100644
---- a/src/rgw/cls_fifo_legacy.cc
-+++ b/src/rgw/cls_fifo_legacy.cc
-@@ -1701,6 +1701,7 @@ int FIFO::list(int max_entries,
-
- int FIFO::trim(std::string_view markstr, bool exclusive, optional_yield y)
- {
-+ bool overshoot = false;
- auto marker = to_marker(markstr);
- if (!marker) {
- return -EINVAL;
-@@ -1709,6 +1710,25 @@ int FIFO::trim(std::string_view markstr, bool exclusive,
optional_yield y)
- auto ofs = marker->ofs;
- std::unique_lock l(m);
- auto tid = ++next_tid;
-+ auto hn = info.head_part_num;
-+ const auto max_part_size = info.params.max_part_size;
-+ if (part_num > hn) {
-+ l.unlock();
-+ auto r = read_meta(tid, y);
-+ if (r < 0) {
-+ return r;
-+ }
-+ l.lock();
-+ auto hn = info.head_part_num;
-+ if (part_num > hn) {
-+ overshoot = true;
-+ part_num = hn;
-+ ofs = max_part_size;
-+ }
-+ }
-+ if (part_num < info.tail_part_num) {
-+ return -ENODATA;
-+ }
- auto pn = info.tail_part_num;
- l.unlock();
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
-@@ -1719,7 +1739,6 @@ int FIFO::trim(std::string_view markstr, bool exclusive,
optional_yield y)
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
- << " pn=" << pn << " tid=" << tid
<< dendl;
- std::unique_lock l(m);
-- auto max_part_size = info.params.max_part_size;
- l.unlock();
- r = trim_part(pn, max_part_size, std::nullopt, false, tid, y);
- if (r < 0 && r == -ENOENT) {
-@@ -1771,7 +1790,7 @@ int FIFO::trim(std::string_view markstr, bool exclusive,
optional_yield y)
- << " canceled too many times, giving up: tid=" << tid
<< dendl;
- return -EIO;
- }
-- return 0;
-+ return overshoot ? -ENODATA : 0;
- }
-
- struct Trimmer : public Completion<Trimmer> {
-@@ -1782,7 +1801,9 @@ struct Trimmer : public Completion<Trimmer> {
- bool exclusive;
- std::uint64_t tid;
- bool update = false;
-+ bool reread = false;
- bool canceled = false;
-+ bool overshoot = false;
- int retries = 0;
-
- Trimmer(FIFO* fifo, std::int64_t part_num, std::uint64_t ofs, std::int64_t pn,
-@@ -1794,6 +1815,45 @@ struct Trimmer : public Completion<Trimmer> {
- auto cct = fifo->cct;
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
- << " entering: tid=" << tid << dendl;
-+
-+ if (reread) {
-+ reread = false;
-+ if (r < 0) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " read_meta failed: r="
-+ << r << " tid=" << tid << dendl;
-+ complete(std::move(p), r);
-+ return;
-+ }
-+ std::unique_lock l(fifo->m);
-+ auto hn = fifo->info.head_part_num;
-+ const auto max_part_size = fifo->info.params.max_part_size;
-+ const auto tail_part_num = fifo->info.tail_part_num;
-+ l.unlock();
-+ if (part_num > hn) {
-+ part_num = hn;
-+ ofs = max_part_size;
-+ overshoot = true;
-+ }
-+ if (part_num < tail_part_num) {
-+ complete(std::move(p), -ENODATA);
-+ return;
-+ }
-+ pn = tail_part_num;
-+ if (pn < part_num) {
-+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " pn=" << pn << " tid=" << tid
<< dendl;
-+ fifo->trim_part(pn++, max_part_size, std::nullopt,
-+ false, tid, call(std::move(p)));
-+ } else {
-+ update = true;
-+ canceled = tail_part_num < part_num;
-+ fifo->trim_part(part_num, ofs, std::nullopt, exclusive, tid,
-+ call(std::move(p)));
-+ }
-+ return;
-+ }
-+
- if (r == -ENOENT) {
- r = 0;
- }
-@@ -1850,7 +1910,7 @@ struct Trimmer : public Completion<Trimmer> {
- .tail_part_num(part_num), objv, &canceled,
- tid, call(std::move(p)));
- } else {
-- complete(std::move(p), 0);
-+ complete(std::move(p), overshoot ? -ENODATA : 0);
- }
- }
- };
-@@ -1860,6 +1920,7 @@ void FIFO::trim(std::string_view markstr, bool exclusive,
- auto marker = to_marker(markstr);
- auto realmark = marker.value_or(::rgw::cls::fifo::marker{});
- std::unique_lock l(m);
-+ const auto hn = info.head_part_num;
- const auto max_part_size = info.params.max_part_size;
- const auto pn = info.tail_part_num;
- const auto part_oid = info.part_oid(pn);
-@@ -1875,6 +1936,11 @@ void FIFO::trim(std::string_view markstr, bool exclusive,
- }
- ++trimmer->pn;
- auto ofs = marker->ofs;
-+ if (marker->num > hn) {
-+ trimmer->reread = true;
-+ read_meta(tid, Trimmer::call(std::move(trimmer)));
-+ return;
-+ }
- if (pn < marker->num) {
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" <<
__LINE__
- << " pn=" << pn << " tid=" << tid
<< dendl;
-diff --git a/src/test/rgw/test_cls_fifo_legacy.cc b/src/test/rgw/test_cls_fifo_legacy.cc
-index 69cee5a887405..26d9e9a9253e4 100644
---- a/src/test/rgw/test_cls_fifo_legacy.cc
-+++ b/src/test/rgw/test_cls_fifo_legacy.cc
-@@ -1125,3 +1125,54 @@ TEST_F(AioLegacyFIFO, TestPushBatch)
- auto& info = f->meta();
- ASSERT_EQ(info.head_part_num, 4);
- }
-+
-+TEST_F(LegacyFIFO, TrimAll)
-+{
-+ std::unique_ptr<RCf::FIFO> f;
-+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield);
-+ ASSERT_EQ(0, r);
-+ static constexpr auto max_entries = 10u;
-+ for (uint32_t i = 0; i < max_entries; ++i) {
-+ cb::list bl;
-+ encode(i, bl);
-+ r = f->push(bl, null_yield);
-+ ASSERT_EQ(0, r);
-+ }
-+
-+ /* trim one entry */
-+ r = f->trim(RCf::marker::max().to_string(), false, null_yield);
-+ ASSERT_EQ(-ENODATA, r);
-+
-+ std::vector<RCf::list_entry> result;
-+ bool more;
-+ r = f->list(1, std::nullopt, &result, &more, null_yield);
-+ ASSERT_EQ(0, r);
-+ ASSERT_TRUE(result.empty());
-+}
-+
-+TEST_F(LegacyFIFO, AioTrimAll)
-+{
-+ std::unique_ptr<RCf::FIFO> f;
-+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield);
-+ ASSERT_EQ(0, r);
-+ static constexpr auto max_entries = 10u;
-+ for (uint32_t i = 0; i < max_entries; ++i) {
-+ cb::list bl;
-+ encode(i, bl);
-+ r = f->push(bl, null_yield);
-+ ASSERT_EQ(0, r);
-+ }
-+
-+ auto c = R::Rados::aio_create_completion();
-+ f->trim(RCf::marker::max().to_string(), false, c);
-+ c->wait_for_complete();
-+ r = c->get_return_value();
-+ c->release();
-+ ASSERT_EQ(-ENODATA, r);
-+
-+ std::vector<RCf::list_entry> result;
-+ bool more;
-+ r = f->list(1, std::nullopt, &result, &more, null_yield);
-+ ASSERT_EQ(0, r);
-+ ASSERT_TRUE(result.empty());
-+}
-
-From d91df95e800f86d95ece8a0d3c84a260a009a1b9 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Tue, 26 Jan 2021 20:07:45 -0500
-Subject: [PATCH 13/26] rgw: Actually pull logbacking_generations into datalog
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit eb0f8ffcc785146a1fb249f4531620787be216ba)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_datalog.cc | 131 +++++++++++++++++++++++++++-----------
- src/rgw/rgw_datalog.h | 26 +++++++-
- src/rgw/rgw_log_backing.h | 2 +
- 3 files changed, 119 insertions(+), 40 deletions(-)
-
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index 2b04d530d1c6f..c64b22d518a9f 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -178,8 +178,8 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
- lr::ObjectWriteOperation op;
- cls_log_trim(op, {}, {}, {}, std::string(marker));
- auto r = rgw_rados_operate(ioctx, oids[index], &op, null_yield);
-- if (r == -ENOENT) r = 0;
-- if (r < 0) {
-+ if (r == -ENOENT) r = -ENODATA;
-+ if (r < 0 && r != -ENODATA) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": failed to get info from " << oids[index]
- << cpp_strerror(-r) << dendl;
-@@ -191,7 +191,7 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
- lr::ObjectWriteOperation op;
- cls_log_trim(op, {}, {}, {}, std::string(marker));
- auto r = ioctx.aio_operate(oids[index], c, &op, 0);
-- if (r == -ENOENT) r = 0;
-+ if (r == -ENOENT) r = -ENODATA;
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": failed to get info from " << oids[index]
-@@ -333,7 +333,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- librados::AioCompletion* c) override {
- int r = 0;
- if (marker == rgw::cls::fifo::marker(0, 0).to_string()) {
-- rgw_complete_aio_completion(c, 0);
-+ rgw_complete_aio_completion(c, -ENODATA);
- } else {
- fifos[index]->trim(marker, false, c);
- }
-@@ -352,6 +352,65 @@ RGWDataChangesLog::RGWDataChangesLog(CephContext* cct)
- prefix(get_prefix()),
- changes(cct->_conf->rgw_data_log_changes_size) {}
-
-+bs::error_code DataLogBackends::handle_init(entries_t e) noexcept {
-+ std::unique_lock l(m);
-+
-+ for (const auto& [gen_id, gen] : e) {
-+ if (gen.empty) {
-+ lderr(datalog.cct)
-+ << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": ERROR: given empty generation: gen_id=" << gen_id <<
dendl;
-+ }
-+ if (count(gen_id) != 0) {
-+ lderr(datalog.cct)
-+ << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": ERROR: generation already exists: gen_id=" << gen_id
<< dendl;
-+ }
-+ try {
-+ switch (gen.type) {
-+ case log_type::omap:
-+ emplace(gen_id, new RGWDataChangesOmap(ioctx, datalog, gen_id, shards));
-+ break;
-+ case log_type::fifo:
-+ emplace(gen_id, new RGWDataChangesFIFO(ioctx, datalog, gen_id, shards));
-+ break;
-+ default:
-+ lderr(datalog.cct)
-+ << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": IMPOSSIBLE: invalid log type: gen_id=" << gen_id
-+ << ", type" << gen.type << dendl;
-+ return bs::error_code(EFAULT, bs::system_category());
-+ }
-+ } catch (const bs::system_error& err) {
-+ lderr(datalog.cct)
-+ << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": error setting up backend: gen_id=" << gen_id
-+ << ", err=" << err.what() << dendl;
-+ return err.code();
-+ }
-+ }
-+ return {};
-+}
-+bs::error_code DataLogBackends::handle_new_gens(entries_t e) noexcept {
-+ return handle_init(std::move(e));
-+}
-+bs::error_code DataLogBackends::handle_empty_to(uint64_t new_tail) noexcept {
-+ std::unique_lock l(m);
-+ auto i = cbegin();
-+ if (i->first < new_tail) {
-+ return {};
-+ }
-+ if (new_tail >= (cend() - 1)->first) {
-+ lderr(datalog.cct)
-+ << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": ERROR: attempt to trim head: new_tail=" << new_tail
<< dendl;
-+ return bs::error_code(EFAULT, bs::system_category());
-+ }
-+ erase(i, upper_bound(new_tail));
-+ return {};
-+}
-+
-+
- int RGWDataChangesLog::start(const RGWZone* _zone,
- const RGWZoneParams& zoneparams,
- librados::Rados* lr)
-@@ -371,31 +430,21 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
- return -r;
- }
-
-- auto found = log_backing_type(ioctx, *defbacking, num_shards,
-- [this](int i) { return get_oid(0, i); },
-- null_yield);
-+ auto besr = logback_generations::init<DataLogBackends>(
-+ ioctx, metadata_log_oid(), [this](uint64_t gen_id, int shard) {
-+ return get_oid(gen_id, shard);
-+ },
-+ num_shards, *defbacking, null_yield, *this);
-
-- if (!found) {
-- lderr(cct) << __PRETTY_FUNCTION__
-- << ": Error when checking log type: "
-- << found.error().message() << dendl;
-- }
-- try {
-- switch (*found) {
-- case log_type::omap:
-- bes.set_zero(new RGWDataChangesOmap(ioctx, *this, 0, num_shards));
-- break;
-- case log_type::fifo:
-- bes.set_zero(new RGWDataChangesFIFO(ioctx, *this, 0, num_shards));
-- break;
-- }
-- } catch (bs::system_error& e) {
-+
-+ if (!besr) {
- lderr(cct) << __PRETTY_FUNCTION__
-- << ": Error when starting backend: "
-- << e.what() << dendl;
-- return ceph::from_error_code(e.code());
-+ << ": Error initializing backends: "
-+ << besr.error().message() << dendl;
-+ return ceph::from_error_code(besr.error());
- }
-
-+ bes = std::move(*besr);
- renew_thread = make_named_thread("rgw_dt_lg_renew",
- &RGWDataChangesLog::renew_run, this);
- return 0;
-@@ -425,7 +474,7 @@ int RGWDataChangesLog::renew_entries()
- l.unlock();
-
- auto ut = real_clock::now();
-- auto be = bes.head();
-+ auto be = bes->head();
- for (const auto& bs : entries) {
- auto index = choose_oid(bs);
-
-@@ -592,7 +641,7 @@ int RGWDataChangesLog::add_entry(const RGWBucketInfo&
bucket_info, int shard_id)
-
- ldout(cct, 20) << "RGWDataChangesLog::add_entry() sending update with
now=" << now << " cur_expiration=" << expiration <<
dendl;
-
-- auto be = bes.head();
-+ auto be = bes->head();
- ret = be->push(index, now, change.key, std::move(bl));
-
- now = real_clock::now();
-@@ -634,7 +683,9 @@ int DataLogBackends::list(int shard, int max_entries,
- if (r < 0)
- return r;
-
-- *out_marker = gencursor(gen_id, out_cursor);
-+ if (out_marker && !out_cursor.empty()) {
-+ *out_marker = gencursor(gen_id, out_cursor);
-+ }
- for (auto& g : gentries) {
- g.log_id = gencursor(gen_id, g.log_id);
- }
-@@ -653,7 +704,7 @@ int RGWDataChangesLog::list_entries(int shard, int max_entries,
- std::string* out_marker, bool* truncated)
- {
- assert(shard < num_shards);
-- return bes.list(shard, max_entries, entries, marker, out_marker, truncated);
-+ return bes->list(shard, max_entries, entries, marker, out_marker, truncated);
- }
-
- int RGWDataChangesLog::list_entries(int max_entries,
-@@ -684,8 +735,12 @@ int RGWDataChangesLog::list_entries(int max_entries,
- int RGWDataChangesLog::get_info(int shard_id, RGWDataChangesLogInfo *info)
- {
- assert(shard_id < num_shards);
-- auto be = bes.head();
-- return be->get_info(shard_id, info);
-+ auto be = bes->head();
-+ auto r = be->get_info(shard_id, info);
-+ if (!info->marker.empty()) {
-+ info->marker = gencursor(be->gen_id, info->marker);
-+ }
-+ return r;
- }
-
- int DataLogBackends::trim_entries(int shard_id, std::string_view marker)
-@@ -696,13 +751,13 @@ int DataLogBackends::trim_entries(int shard_id, std::string_view
marker)
- const auto tail_gen = begin()->first;
- if (target_gen < tail_gen) return 0;
- auto r = 0;
-- for (auto i = lower_bound(0);
-- i != end() && i->first <= target_gen && i->first <=
head_gen && r >= 0;
-- i = upper_bound(i->first)) {
-- auto be = i->second;
-+ for (auto be = lower_bound(0)->second;
-+ be->gen_id <= target_gen && be->gen_id <= head_gen &&
r >= 0;
-+ be = upper_bound(be->gen_id)->second) {
- l.unlock();
- auto c = be->gen_id == target_gen ? cursor : be->max_marker();
- r = be->trim(shard_id, c);
-+ if (r == -ENODATA && be->gen_id < target_gen) r = 0;
- l.lock();
- };
- return r;
-@@ -711,7 +766,7 @@ int DataLogBackends::trim_entries(int shard_id, std::string_view
marker)
- int RGWDataChangesLog::trim_entries(int shard_id, std::string_view marker)
- {
- assert(shard_id < num_shards);
-- return bes.trim_entries(shard_id, marker);
-+ return bes->trim_entries(shard_id, marker);
- }
-
- class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
-@@ -735,6 +790,8 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
- void handle(Ptr&& p, int r) {
- auto gen_id = be->gen_id;
- be.reset();
-+ if (r == -ENOENT) r = -ENODATA;
-+ if (r == -ENODATA && gen_id < target_gen) r = 0;
- if (r < 0) {
- complete(std::move(p), r);
- return;
-@@ -781,7 +838,7 @@ int RGWDataChangesLog::trim_entries(int shard_id, std::string_view
marker,
- librados::AioCompletion* c)
- {
- assert(shard_id < num_shards);
-- bes.trim_entries(shard_id, marker, c);
-+ bes->trim_entries(shard_id, marker, c);
- return 0;
- }
-
-diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
-index 0915bebde11cf..e9a768d546c00 100644
---- a/src/rgw/rgw_datalog.h
-+++ b/src/rgw/rgw_datalog.h
-@@ -36,6 +36,7 @@
- #include "cls/log/cls_log_types.h"
-
- #include "rgw_basic_types.h"
-+#include "rgw_log_backing.h"
- #include "rgw_sync_policy.h"
- #include "rgw_zone.h"
- #include "rgw_trim_bilog.h"
-@@ -121,11 +122,22 @@ class RGWDataChangesLog;
-
- class RGWDataChangesBE;
-
--class DataLogBackends
-- : private bc::flat_map<uint64_t, boost::intrusive_ptr<RGWDataChangesBE>>
{
-+class DataLogBackends final
-+ : public logback_generations,
-+ private bc::flat_map<uint64_t, boost::intrusive_ptr<RGWDataChangesBE>>
{
-+ friend class logback_generations;
- friend class GenTrim;
-
- std::mutex m;
-+ RGWDataChangesLog& datalog;
-+
-+ DataLogBackends(librados::IoCtx& ioctx,
-+ std::string oid,
-+ fu2::unique_function<std::string(
-+ uint64_t, int) const>&& get_oid,
-+ int shards, RGWDataChangesLog& datalog) noexcept
-+ : logback_generations(ioctx, oid, std::move(get_oid),
-+ shards), datalog(datalog) {}
- public:
-
- boost::intrusive_ptr<RGWDataChangesBE> head() {
-@@ -144,20 +156,28 @@ class DataLogBackends
- void set_zero(RGWDataChangesBE* be) {
- emplace(0, be);
- }
-+
-+ bs::error_code handle_init(entries_t e) noexcept override;
-+ bs::error_code handle_new_gens(entries_t e) noexcept override;
-+ bs::error_code handle_empty_to(uint64_t new_tail) noexcept override;
- };
-
- class RGWDataChangesLog {
-+ friend DataLogBackends;
- CephContext *cct;
- librados::IoCtx ioctx;
- rgw::BucketChangeObserver *observer = nullptr;
- const RGWZone* zone;
-- DataLogBackends bes;
-+ std::unique_ptr<DataLogBackends> bes;
-
- const int num_shards;
- std::string get_prefix() {
- auto prefix = cct->_conf->rgw_data_log_obj_prefix;
- return prefix.empty() ? prefix : "data_log"s;
- }
-+ std::string metadata_log_oid() {
-+ return get_prefix() + "generations_metadata"s;
-+ }
- std::string prefix;
-
- ceph::mutex lock = ceph::make_mutex("RGWDataChangesLog::lock");
-diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
-index 55a3139d11e2b..ef2583c35b204 100644
---- a/src/rgw/rgw_log_backing.h
-+++ b/src/rgw/rgw_log_backing.h
-@@ -135,6 +135,8 @@ class logback_generations : public librados::WatchCtx2 {
- protected:
- const int shards;
-
-+private:
-+
- uint64_t watchcookie = 0;
-
- obj_version version;
-
-From f1e2564d952c9300dedcf017c3cf869ef6bf8ec8 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Fri, 22 Jan 2021 20:48:39 -0500
-Subject: [PATCH 14/26] rgw: Add and trim datalog generations
-
-This lets us actually change type in mid-stream.
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 32b100d797cdf88648530e0162fd103cf279df31)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_admin.cc | 53 +++++++++++++++
- src/rgw/rgw_datalog.cc | 103 ++++++++++++++++++++++++++++--
- src/rgw/rgw_datalog.h | 8 +++
- src/test/cli/radosgw-admin/help.t | 1 +
- 4 files changed, 158 insertions(+), 7 deletions(-)
-
-diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
-index f0da7b9573a1c..33c8eae5725be 100644
---- a/src/rgw/rgw_admin.cc
-+++ b/src/rgw/rgw_admin.cc
-@@ -244,6 +244,7 @@ void usage()
- cout << " datalog list list data log\n";
- cout << " datalog trim trim data log\n";
- cout << " datalog status read data log status\n";
-+ cout << " datalog type change datalog type to
--log_type={fifo,omap}\n";
- cout << " orphans find deprecated -- init and run search for
leaked rados objects (use job-id, pool)\n";
- cout << " orphans finish deprecated -- clean up search for
leaked rados objects\n";
- cout << " orphans list-jobs deprecated -- list the current
job-ids for orphans search\n";
-@@ -720,6 +721,8 @@ enum class OPT {
- DATALOG_STATUS,
- DATALOG_AUTOTRIM,
- DATALOG_TRIM,
-+ DATALOG_TYPE,
-+ DATALOG_PRUNE,
- REALM_CREATE,
- REALM_DELETE,
- REALM_GET,
-@@ -930,6 +933,8 @@ static SimpleCmd::Commands all_cmds = {
- { "datalog status", OPT::DATALOG_STATUS },
- { "datalog autotrim", OPT::DATALOG_AUTOTRIM },
- { "datalog trim", OPT::DATALOG_TRIM },
-+ { "datalog type", OPT::DATALOG_TYPE },
-+ { "datalog prune", OPT::DATALOG_PRUNE },
- { "realm create", OPT::REALM_CREATE },
- { "realm delete", OPT::REALM_DELETE },
- { "realm get", OPT::REALM_GET },
-@@ -1020,6 +1025,15 @@ BIIndexType get_bi_index_type(const string& type_str) {
- return BIIndexType::Invalid;
- }
-
-+log_type get_log_type(const string& type_str) {
-+ if (strcasecmp(type_str.c_str(), "fifo") == 0)
-+ return log_type::fifo;
-+ if (strcasecmp(type_str.c_str(), "omap") == 0)
-+ return log_type::omap;
-+
-+ return static_cast<log_type>(0xff);
-+}
-+
- void dump_bi_entry(bufferlist& bl, BIIndexType index_type, Formatter *formatter)
- {
- auto iter = bl.cbegin();
-@@ -3145,6 +3159,7 @@ int main(int argc, const char **argv)
- uint64_t min_rewrite_stripe_size = 0;
-
- BIIndexType bi_index_type = BIIndexType::Plain;
-+ std::optional<log_type> opt_log_type;
-
- string job_id;
- int num_shards = 0;
-@@ -3467,6 +3482,14 @@ int main(int argc, const char **argv)
- cerr << "ERROR: invalid bucket index entry type" <<
std::endl;
- return EINVAL;
- }
-+ } else if (ceph_argparse_witharg(args, i, &val, "--log-type",
(char*)NULL)) {
-+ string log_type_str = val;
-+ auto l = get_log_type(log_type_str);
-+ if (l == static_cast<log_type>(0xff)) {
-+ cerr << "ERROR: invalid log type" << std::endl;
-+ return EINVAL;
-+ }
-+ opt_log_type = l;
- } else if (ceph_argparse_binary_flag(args, i, &is_master_int, NULL,
"--master", (char*)NULL)) {
- is_master = (bool)is_master_int;
- is_master_set = true;
-@@ -8850,6 +8873,36 @@ int main(int argc, const char **argv)
- }
- }
-
-+ if (opt_cmd == OPT::DATALOG_TYPE) {
-+ if (!opt_log_type) {
-+ std::cerr << "log-type not specified." << std::endl;
-+ return -EINVAL;
-+ }
-+ auto datalog =
static_cast<rgw::sal::RGWRadosStore*>(store)->svc()->datalog_rados;
-+ ret = datalog->change_format(*opt_log_type, null_yield);
-+ if (ret < 0) {
-+ cerr << "ERROR: change_format(): " << cpp_strerror(-ret)
<< std::endl;
-+ return -ret;
-+ }
-+ }
-+
-+ if (opt_cmd == OPT::DATALOG_PRUNE) {
-+ auto datalog =
static_cast<rgw::sal::RGWRadosStore*>(store)->svc()->datalog_rados;
-+ std::optional<uint64_t> through;
-+ ret = datalog->trim_generations(through);
-+
-+ if (ret < 0) {
-+ cerr << "ERROR: trim_generations(): " << cpp_strerror(-ret)
<< std::endl;
-+ return -ret;
-+ }
-+
-+ if (through) {
-+ std::cout << "Pruned " << *through << " empty
generations." << std::endl;
-+ } else {
-+ std::cout << "No empty generations." << std::endl;
-+ }
-+ }
-+
- bool quota_op = (opt_cmd == OPT::QUOTA_SET || opt_cmd == OPT::QUOTA_ENABLE || opt_cmd
== OPT::QUOTA_DISABLE);
-
- if (quota_op) {
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index c64b22d518a9f..6182ae91909e4 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -202,6 +202,29 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
- std::string_view max_marker() const override {
- return "99999999"sv;
- }
-+ int is_empty() override {
-+ for (auto shard = 0u; shard < oids.size(); ++shard) {
-+ std::list<cls_log_entry> log_entries;
-+ lr::ObjectReadOperation op;
-+ std::string out_marker;
-+ bool truncated;
-+ cls_log_list(op, {}, {}, {}, 1, log_entries, &out_marker, &truncated);
-+ auto r = rgw_rados_operate(ioctx, oids[shard], &op, nullptr, null_yield);
-+ if (r == -ENOENT) {
-+ continue;
-+ }
-+ if (r < 0) {
-+ lderr(cct) << __PRETTY_FUNCTION__
-+ << ": failed to list " << oids[shard]
-+ << cpp_strerror(-r) << dendl;
-+ return r;
-+ }
-+ if (!log_entries.empty()) {
-+ return 0;
-+ }
-+ }
-+ return 1;
-+ }
- };
-
- class RGWDataChangesFIFO final : public RGWDataChangesBE {
-@@ -344,6 +367,24 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- rgw::cls::fifo::marker::max().to_string();
- return std::string_view(mm);
- }
-+ int is_empty() override {
-+ std::vector<rgw::cls::fifo::list_entry> log_entries;
-+ bool more = false;
-+ for (auto shard = 0u; shard < fifos.size(); ++shard) {
-+ auto r = fifos[shard]->list(1, {}, &log_entries, &more,
-+ null_yield);
-+ if (r < 0) {
-+ lderr(cct) << __PRETTY_FUNCTION__
-+ << ": unable to list FIFO: " << get_oid(shard)
-+ << ": " << cpp_strerror(-r) << dendl;
-+ return r;
-+ }
-+ if (!log_entries.empty()) {
-+ return 0;
-+ }
-+ }
-+ return 1;
-+ }
- };
-
- RGWDataChangesLog::RGWDataChangesLog(CephContext* cct)
-@@ -781,7 +822,7 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
-
- GenTrim(DataLogBackends* bes, int shard_id, uint64_t target_gen, std::string cursor,
- uint64_t head_gen, uint64_t tail_gen,
-- boost::intrusive_ptr<RGWDataChangesBE>&& be,
-+ boost::intrusive_ptr<RGWDataChangesBE> be,
- lr::AioCompletion* super)
- : Completion(super), bes(bes), shard_id(shard_id), target_gen(target_gen),
- cursor(std::move(cursor)), head_gen(head_gen), tail_gen(tail_gen),
-@@ -792,6 +833,7 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
- be.reset();
- if (r == -ENOENT) r = -ENODATA;
- if (r == -ENODATA && gen_id < target_gen) r = 0;
-+ r = 0;
- if (r < 0) {
- complete(std::move(p), r);
- return;
-@@ -808,7 +850,7 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
- be = i->second;
- }
- auto c = be->gen_id == target_gen ? cursor : be->max_marker();
-- r = be->trim(shard_id, c, call(std::move(p)));
-+ be->trim(shard_id, c, call(std::move(p)));
- }
- };
-
-@@ -821,19 +863,58 @@ void DataLogBackends::trim_entries(int shard_id, std::string_view
marker,
- const auto tail_gen = begin()->first;
- if (target_gen < tail_gen) {
- l.unlock();
-- rgw_complete_aio_completion(c, 0);
-+ rgw_complete_aio_completion(c, -ENODATA);
- return;
- }
-- auto be = lower_bound(0)->second;
-+ auto be = begin()->second;
- l.unlock();
-- auto p = be.get();
- auto gt = std::make_unique<GenTrim>(this, shard_id, target_gen,
- std::string(cursor), head_gen, tail_gen,
-- std::move(be), c);
-+ be, c);
-+
-+ auto cc = be->gen_id == target_gen ? cursor : be->max_marker();
-+ be->trim(shard_id, cc, GenTrim::call(std::move(gt)));
-+}
-+
-+int DataLogBackends::trim_generations(std::optional<uint64_t>& through) {
-+ if (size() == 1) {
-+ return 0;
-+ }
-
-- p->trim(shard_id, cursor, GenTrim::call(std::move(gt)));
-+ std::vector<mapped_type> candidates;
-+ {
-+ std::scoped_lock l(m);
-+ auto e = cend() - 1;
-+ for (auto i = cbegin(); i < e; ++i) {
-+ candidates.push_back(i->second);
-+ }
-+ }
-+
-+ std::optional<uint64_t> highest;
-+ for (auto& be : candidates) {
-+ auto r = be->is_empty();
-+ if (r < 0) {
-+ return r;
-+ } else if (r == 1) {
-+ highest = be->gen_id;
-+ } else {
-+ break;
-+ }
-+ }
-+
-+ through = highest;
-+ if (!highest) {
-+ return 0;
-+ }
-+ auto ec = empty_to(*highest, null_yield);
-+ if (ec) {
-+ return ceph::from_error_code(ec);
-+ }
-+
-+ return ceph::from_error_code(remove_empty(null_yield));
- }
-
-+
- int RGWDataChangesLog::trim_entries(int shard_id, std::string_view marker,
- librados::AioCompletion* c)
- {
-@@ -897,3 +978,11 @@ std::string RGWDataChangesLog::max_marker() const {
- return gencursor(std::numeric_limits<uint64_t>::max(),
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
- }
-+
-+int RGWDataChangesLog::change_format(log_type type, optional_yield y) {
-+ return ceph::from_error_code(bes->new_backing(type, y));
-+}
-+
-+int RGWDataChangesLog::trim_generations(std::optional<uint64_t>& through) {
-+ return bes->trim_generations(through);
-+}
-diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
-index e9a768d546c00..5886d51dac174 100644
---- a/src/rgw/rgw_datalog.h
-+++ b/src/rgw/rgw_datalog.h
-@@ -160,6 +160,8 @@ class DataLogBackends final
- bs::error_code handle_init(entries_t e) noexcept override;
- bs::error_code handle_new_gens(entries_t e) noexcept override;
- bs::error_code handle_empty_to(uint64_t new_tail) noexcept override;
-+
-+ int trim_generations(std::optional<uint64_t>& through);
- };
-
- class RGWDataChangesLog {
-@@ -262,6 +264,10 @@ class RGWDataChangesLog {
- // a marker that compares greater than any other
- std::string max_marker() const;
- std::string get_oid(uint64_t gen_id, int shard_id) const;
-+
-+
-+ int change_format(log_type type, optional_yield y);
-+ int trim_generations(std::optional<uint64_t>& through);
- };
-
- class RGWDataChangesBE : public boost::intrusive_ref_counter<RGWDataChangesBE> {
-@@ -303,6 +309,8 @@ class RGWDataChangesBE : public
boost::intrusive_ref_counter<RGWDataChangesBE> {
- virtual int trim(int index, std::string_view marker,
- librados::AioCompletion* c) = 0;
- virtual std::string_view max_marker() const = 0;
-+ // 1 on empty, 0 on non-empty, negative on error.
-+ virtual int is_empty() = 0;
- };
-
-
-diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t
-index 490499f24a029..c63c63cb55e2c 100644
---- a/src/test/cli/radosgw-admin/help.t
-+++ b/src/test/cli/radosgw-admin/help.t
-@@ -138,6 +138,7 @@
- datalog list list data log
- datalog trim trim data log
- datalog status read data log status
-+ datalog type change datalog type to --log_type={fifo,omap}
- orphans find deprecated -- init and run search for leaked rados
objects (use job-id, pool)
- orphans finish deprecated -- clean up search for leaked rados objects
- orphans list-jobs deprecated -- list the current job-ids for orphans
search
-
-From 176b7f12bc45f17c610bcbec29d58078b32592b9 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Tue, 26 Jan 2021 12:24:41 -0500
-Subject: [PATCH 15/26] cls/fifo: Don't error in the log if we're being probed
- for existence
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 4a2575783a050f27b22b7bfe4364520bf29fc6a5)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/cls/fifo/cls_fifo.cc | 10 +++++++---
- 1 file changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/src/cls/fifo/cls_fifo.cc b/src/cls/fifo/cls_fifo.cc
-index db936078e8c3d..fc89a20e6b2bf 100644
---- a/src/cls/fifo/cls_fifo.cc
-+++ b/src/cls/fifo/cls_fifo.cc
-@@ -162,7 +162,7 @@ int write_part_header(cls_method_context_t hctx,
-
- int read_header(cls_method_context_t hctx,
- std::optional<objv> objv,
-- info* info)
-+ info* info, bool get_info = false)
- {
- std::uint64_t size;
-
-@@ -180,7 +180,11 @@ int read_header(cls_method_context_t hctx,
- }
-
- if (r == 0) {
-- CLS_ERR("ERROR: %s: Zero length object, returning ENODATA",
__PRETTY_FUNCTION__);
-+ if (get_info) {
-+ CLS_LOG(5, "%s: Zero length object, likely probe, returning ENODATA",
__PRETTY_FUNCTION__);
-+ } else {
-+ CLS_ERR("ERROR: %s: Zero length object, returning ENODATA",
__PRETTY_FUNCTION__);
-+ }
- return -ENODATA;
- }
-
-@@ -366,7 +370,7 @@ int get_meta(cls_method_context_t hctx, ceph::buffer::list* in,
- }
-
- op::get_meta_reply reply;
-- int r = read_header(hctx, op.version, &reply.info);
-+ int r = read_header(hctx, op.version, &reply.info, true);
- if (r < 0) {
- return r;
- }
-
-From f70374f71fe4e715f6221d34aee268ed601b17b8 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Sat, 21 Nov 2020 19:34:07 -0500
-Subject: [PATCH 16/26] rgw: Add LazyFIFO to keep from blasting an op-per-shard
- on startup
-
-LazyFIFO opens the FIFO on first access.
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 1cc4a0a4e274700b4ae044db125a8cb3a64253a2)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_log_backing.h | 135 ++++++++++++++++++++++++++++++++++++++
- 1 file changed, 135 insertions(+)
-
-diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
-index ef2583c35b204..cd677764c5795 100644
---- a/src/rgw/rgw_log_backing.h
-+++ b/src/rgw/rgw_log_backing.h
-@@ -32,6 +32,8 @@
- namespace bc = boost::container;
- namespace bs = boost::system;
-
-+#include "cls_fifo_legacy.h"
-+
- /// Type of log backing, stored in the mark used in the quick check,
- /// and passed to checking functions.
- enum class log_type {
-@@ -255,4 +257,137 @@ cursorgeno(std::optional<std::string_view> cursor) {
- }
- }
-
-+class LazyFIFO {
-+ librados::IoCtx& ioctx;
-+ std::string oid;
-+ std::mutex m;
-+ std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
-+
-+ int lazy_init(optional_yield y) {
-+ std::unique_lock l(m);
-+ if (fifo) return 0;
-+ auto r = rgw::cls::fifo::FIFO::create(ioctx, oid, &fifo, y);
-+ if (r) {
-+ fifo.reset();
-+ }
-+ return r;
-+ }
-+
-+public:
-+
-+ LazyFIFO(librados::IoCtx& ioctx, std::string oid)
-+ : ioctx(ioctx), oid(std::move(oid)) {}
-+
-+ int read_meta(optional_yield y) {
-+ auto r = lazy_init(y);
-+ if (r < 0) return r;
-+ return fifo->read_meta(y);
-+ }
-+
-+ int meta(rados::cls::fifo::info& info, optional_yield y) {
-+ auto r = lazy_init(y);
-+ if (r < 0) return r;
-+ info = fifo->meta();
-+ return 0;
-+ }
-+
-+ int get_part_layout_info(std::uint32_t& part_header_size,
-+ std::uint32_t& part_entry_overhead,
-+ optional_yield y) {
-+ auto r = lazy_init(y);
-+ if (r < 0) return r;
-+ std::tie(part_header_size, part_entry_overhead)
-+ = fifo->get_part_layout_info();
-+ return 0;
-+ }
-+
-+ int push(const ceph::buffer::list& bl,
-+ optional_yield y) {
-+ auto r = lazy_init(y);
-+ if (r < 0) return r;
-+ return fifo->push(bl, y);
-+ }
-+
-+ int push(ceph::buffer::list& bl,
-+ librados::AioCompletion* c,
-+ optional_yield y) {
-+ auto r = lazy_init(y);
-+ if (r < 0) return r;
-+ fifo->push(bl, c);
-+ return 0;
-+ }
-+
-+ int push(const std::vector<ceph::buffer::list>& data_bufs,
-+ optional_yield y) {
-+ auto r = lazy_init(y);
-+ if (r < 0) return r;
-+ return fifo->push(data_bufs, y);
-+ }
-+
-+ int push(const std::vector<ceph::buffer::list>& data_bufs,
-+ librados::AioCompletion* c,
-+ optional_yield y) {
-+ auto r = lazy_init(y);
-+ if (r < 0) return r;
-+ fifo->push(data_bufs, c);
-+ return 0;
-+ }
-+
-+ int list(int max_entries, std::optional<std::string_view> markstr,
-+ std::vector<rgw::cls::fifo::list_entry>* out,
-+ bool* more, optional_yield y) {
-+ auto r = lazy_init(y);
-+ if (r < 0) return r;
-+ return fifo->list(max_entries, markstr, out, more, y);
-+ }
-+
-+ int list(int max_entries, std::optional<std::string_view> markstr,
-+ std::vector<rgw::cls::fifo::list_entry>* out, bool* more,
-+ librados::AioCompletion* c, optional_yield y) {
-+ auto r = lazy_init(y);
-+ if (r < 0) return r;
-+ fifo->list(max_entries, markstr, out, more, c);
-+ return 0;
-+ }
-+
-+ int trim(std::string_view markstr, bool exclusive, optional_yield y) {
-+ auto r = lazy_init(y);
-+ if (r < 0) return r;
-+ return fifo->trim(markstr, exclusive, y);
-+ }
-+
-+ int trim(std::string_view markstr, bool exclusive, librados::AioCompletion* c,
-+ optional_yield y) {
-+ auto r = lazy_init(y);
-+ if (r < 0) return r;
-+ fifo->trim(markstr, exclusive, c);
-+ return 0;
-+ }
-+
-+ int get_part_info(int64_t part_num, rados::cls::fifo::part_header* header,
-+ optional_yield y) {
-+ auto r = lazy_init(y);
-+ if (r < 0) return r;
-+ return fifo->get_part_info(part_num, header, y);
-+ }
-+
-+ int get_part_info(int64_t part_num, rados::cls::fifo::part_header* header,
-+ librados::AioCompletion* c, optional_yield y) {
-+ auto r = lazy_init(y);
-+ if (r < 0) return r;
-+ fifo->get_part_info(part_num, header, c);
-+ return 0;
-+ }
-+
-+ int get_head_info(fu2::unique_function<
-+ void(int r, rados::cls::fifo::part_header&&)>&& f,
-+ librados::AioCompletion* c,
-+ optional_yield y) {
-+ auto r = lazy_init(y);
-+ if (r < 0) return r;
-+ fifo->get_head_info(std::move(f), c);
-+ return 0;
-+ }
-+};
-+
- #endif
-
-From ce249836e01aacd8024584be666455c299d38172 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Sat, 21 Nov 2020 23:06:38 -0500
-Subject: [PATCH 17/26] rgw: Use LazyFIFO in data changes log
-
-That way we don't start sending ops to open a FIFO until we need it.
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 12939a258f8c627d1b7b23c0b9d7c22e98e69d89)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_datalog.cc | 47 ++++++++++++++++++------------------------
- 1 file changed, 20 insertions(+), 27 deletions(-)
-
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index 6182ae91909e4..3ecab432646c1 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -4,6 +4,7 @@
- #include <vector>
-
- #include "common/debug.h"
-+#include "common/containers.h"
- #include "common/errno.h"
- #include "common/error_code.h"
-
-@@ -24,6 +25,8 @@ static constexpr auto dout_subsys = ceph_subsys_rgw;
- namespace bs = boost::system;
- namespace lr = librados;
-
-+using ceph::containers::tiny_vector;
-+
- void rgw_data_change::dump(ceph::Formatter *f) const
- {
- std::string type;
-@@ -229,27 +232,16 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
-
- class RGWDataChangesFIFO final : public RGWDataChangesBE {
- using centries = std::vector<ceph::buffer::list>;
-- std::vector<std::unique_ptr<rgw::cls::fifo::FIFO>> fifos;
-+ tiny_vector<LazyFIFO> fifos;
-
- public:
- RGWDataChangesFIFO(lr::IoCtx& ioctx,
- RGWDataChangesLog& datalog,
- uint64_t gen_id, int shards)
-- : RGWDataChangesBE(ioctx, datalog, gen_id) {
-- fifos.resize(shards);
-- for (auto i = 0; i < shards; ++i) {
-- auto r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
-- &fifos[i], null_yield);
-- if (r < 0) {
-- throw bs::system_error(ceph::to_error_code(r));
-- }
-- }
-- ceph_assert(fifos.size() == unsigned(shards));
-- ceph_assert(std::none_of(fifos.cbegin(), fifos.cend(),
-- [](const auto& p) {
-- return p == nullptr;
-- }));
-- }
-+ : RGWDataChangesBE(ioctx, datalog, gen_id),
-+ fifos(shards, [&ioctx, this](std::size_t i, auto emplacer) {
-+ emplacer.emplace(ioctx, get_oid(i));
-+ }) {}
- ~RGWDataChangesFIFO() override = default;
- void prepare(ceph::real_time, const std::string&,
- ceph::buffer::list&& entry, entries& out) override {
-@@ -260,7 +252,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- std::get<centries>(out).push_back(std::move(entry));
- }
- int push(int index, entries&& items) override {
-- auto r = fifos[index]->push(std::get<centries>(items), null_yield);
-+ auto r = fifos[index].push(std::get<centries>(items), null_yield);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": unable to push to FIFO: " << get_oid(index)
-@@ -271,7 +263,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- int push(int index, ceph::real_time,
- const std::string&,
- ceph::buffer::list&& bl) override {
-- auto r = fifos[index]->push(std::move(bl), null_yield);
-+ auto r = fifos[index].push(std::move(bl), null_yield);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": unable to push to FIFO: " << get_oid(index)
-@@ -285,8 +277,8 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- std::string* out_marker, bool* truncated) override {
- std::vector<rgw::cls::fifo::list_entry> log_entries;
- bool more = false;
-- auto r = fifos[index]->list(max_entries, marker, &log_entries, &more,
-- null_yield);
-+ auto r = fifos[index].list(max_entries, marker, &log_entries, &more,
-+ null_yield);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": unable to list FIFO: " << get_oid(index)
-@@ -317,14 +309,15 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- }
- int get_info(int index, RGWDataChangesLogInfo *info) override {
- auto& fifo = fifos[index];
-- auto r = fifo->read_meta(null_yield);
-+ auto r = fifo.read_meta(null_yield);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": unable to get FIFO metadata: " << get_oid(index)
- << ": " << cpp_strerror(-r) << dendl;
- return r;
- }
-- auto m = fifo->meta();
-+ rados::cls::fifo::info m;
-+ fifo.meta(m, null_yield);
- auto p = m.head_part_num;
- if (p < 0) {
- info->marker = rgw::cls::fifo::marker{}.to_string();
-@@ -332,7 +325,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- return 0;
- }
- rgw::cls::fifo::part_info h;
-- r = fifo->get_part_info(p, &h, null_yield);
-+ r = fifo.get_part_info(p, &h, null_yield);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": unable to get part info: " << get_oid(index) <<
"/" << p
-@@ -344,7 +337,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- return 0;
- }
- int trim(int index, std::string_view marker) override {
-- auto r = fifos[index]->trim(marker, false, null_yield);
-+ auto r = fifos[index].trim(marker, false, null_yield);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": unable to trim FIFO: " << get_oid(index)
-@@ -358,7 +351,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- if (marker == rgw::cls::fifo::marker(0, 0).to_string()) {
- rgw_complete_aio_completion(c, -ENODATA);
- } else {
-- fifos[index]->trim(marker, false, c);
-+ fifos[index].trim(marker, false, c, null_yield);
- }
- return r;
- }
-@@ -371,8 +364,8 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- std::vector<rgw::cls::fifo::list_entry> log_entries;
- bool more = false;
- for (auto shard = 0u; shard < fifos.size(); ++shard) {
-- auto r = fifos[shard]->list(1, {}, &log_entries, &more,
-- null_yield);
-+ auto r = fifos[shard].list(1, {}, &log_entries, &more,
-+ null_yield);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": unable to list FIFO: " << get_oid(shard)
-
-From ad5a2fadf0fb16d4fc3066811fe11fc53c868263 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Tue, 26 Jan 2021 20:30:58 -0500
-Subject: [PATCH 18/26] rgw: Prune datalog generations in the renew loop
-
-Every 150 times through, which is a bit less than an hour between runs
-by default.
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 8f4291291b0dea4b4701894da0775149266a1373)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-
-Conflicts:
- src/rgw/rgw_datalog.cc
----
- src/rgw/rgw_datalog.cc | 21 +++++++++++++++++++++
- 1 file changed, 21 insertions(+)
-
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index 3ecab432646c1..d81d955ef6f17 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -930,6 +930,8 @@ RGWDataChangesLog::~RGWDataChangesLog() {
- }
-
- void RGWDataChangesLog::renew_run() {
-+ static constexpr auto runs_per_prune = 150;
-+ auto run = 0;
- for (;;) {
- dout(2) << "RGWDataChangesLog::ChangesRenewThread: start" <<
dendl;
- int r = renew_entries();
-@@ -940,6 +942,25 @@ void RGWDataChangesLog::renew_run() {
- if (going_down())
- break;
-
-+ if (run == runs_per_prune) {
-+ std::optional<uint64_t> through;
-+ dout(2) << "RGWDataChangesLog::ChangesRenewThread: pruning old
generations" << dendl;
-+ trim_generations(through);
-+ if (r < 0) {
-+ derr << "RGWDataChangesLog::ChangesRenewThread: failed pruning r="
-+ << r << dendl;
-+ } else if (through) {
-+ dout(2) << "RGWDataChangesLog::ChangesRenewThread: pruned generations
"
-+ << "through " << *through << "." <<
dendl;
-+ } else {
-+ dout(2) << "RGWDataChangesLog::ChangesRenewThread: nothing to prune."
-+ << dendl;
-+ }
-+ run = 0;
-+ } else {
-+ ++run;
-+ }
-+
- int interval = cct->_conf->rgw_data_log_window * 3 / 4;
- std::unique_lock locker{renew_lock};
- renew_cond.wait_for(locker, std::chrono::seconds(interval));
-
-From 0a2bee7e18367fbb1be73ece26e1a6efb099c161 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Tue, 2 Feb 2021 14:09:52 -0500
-Subject: [PATCH 19/26] rgw: Fix cursor handling in DataLogBackends::list
-
-Don't assume that the lowest generation not greater than the requested
-generation actually is the requested generation.
-
-(Also don't hold the lock after we get a backend.)
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit d7739178e994ce84886d297a29f2250e4bd78daa)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_datalog.cc | 15 +++++++++++----
- 1 file changed, 11 insertions(+), 4 deletions(-)
-
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index d81d955ef6f17..1db5eb86d62e1 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -704,7 +704,8 @@ int DataLogBackends::list(int shard, int max_entries,
- std::optional<std::string_view> marker,
- std::string* out_marker, bool* truncated)
- {
-- auto [gen_id, cursor] = cursorgeno(marker);
-+ const auto [start_id, start_cursor] = cursorgeno(marker);
-+ auto gen_id = start_id;
- std::string out_cursor;
- while (max_entries > 0) {
- std::vector<rgw_data_change_log_entry> gentries;
-@@ -712,7 +713,10 @@ int DataLogBackends::list(int shard, int max_entries,
- auto i = lower_bound(gen_id);
- if (i == end()) return 0;
- auto be = i->second;
-- auto r = be->list(shard, max_entries, gentries, cursor,
-+ l.unlock();
-+ gen_id = be->gen_id;
-+ auto r = be->list(shard, max_entries, gentries,
-+ gen_id == start_id ? start_cursor : std::string{},
- &out_cursor, truncated);
- if (r < 0)
- return r;
-@@ -723,10 +727,13 @@ int DataLogBackends::list(int shard, int max_entries,
- for (auto& g : gentries) {
- g.log_id = gencursor(gen_id, g.log_id);
- }
-- max_entries -= gentries.size();
-+ if (gentries.size() > max_entries)
-+ max_entries = 0;
-+ else
-+ max_entries -= gentries.size();
-+
- std::move(gentries.begin(), gentries.end(),
- std::back_inserter(entries));
-- cursor = {};
- ++gen_id;
- }
- return 0;
-
-From 4a6a7b3900ca4d1e14423d1ac07a0be60edb0ad0 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Thu, 4 Feb 2021 15:48:56 -0500
-Subject: [PATCH 20/26] rgw: Don't swallow errors in datalog async trim
-
-Typo and misleading indentation.
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit e97de55f46bbe67b523abfb4c30c50f1547f2601)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_datalog.cc | 16 ++++++++++------
- 1 file changed, 10 insertions(+), 6 deletions(-)
-
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index 1db5eb86d62e1..0b68c45a13e8d 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -798,7 +798,10 @@ int DataLogBackends::trim_entries(int shard_id, std::string_view
marker)
- l.unlock();
- auto c = be->gen_id == target_gen ? cursor : be->max_marker();
- r = be->trim(shard_id, c);
-- if (r == -ENODATA && be->gen_id < target_gen) r = 0;
-+ if (r == -ENOENT)
-+ r = -ENODATA;
-+ if (r == -ENODATA && be->gen_id < target_gen)
-+ r = 0;
- l.lock();
- };
- return r;
-@@ -820,8 +823,8 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
- const uint64_t tail_gen;
- boost::intrusive_ptr<RGWDataChangesBE> be;
-
-- GenTrim(DataLogBackends* bes, int shard_id, uint64_t target_gen, std::string cursor,
-- uint64_t head_gen, uint64_t tail_gen,
-+ GenTrim(DataLogBackends* bes, int shard_id, uint64_t target_gen,
-+ std::string cursor, uint64_t head_gen, uint64_t tail_gen,
- boost::intrusive_ptr<RGWDataChangesBE> be,
- lr::AioCompletion* super)
- : Completion(super), bes(bes), shard_id(shard_id), target_gen(target_gen),
-@@ -831,8 +834,9 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
- void handle(Ptr&& p, int r) {
- auto gen_id = be->gen_id;
- be.reset();
-- if (r == -ENOENT) r = -ENODATA;
-- if (r == -ENODATA && gen_id < target_gen) r = 0;
-+ if (r == -ENOENT)
-+ r = -ENODATA;
-+ if (r == -ENODATA && gen_id < target_gen)
- r = 0;
- if (r < 0) {
- complete(std::move(p), r);
-@@ -844,7 +848,7 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
- auto i = bes->upper_bound(gen_id);
- if (i == bes->end() || i->first > target_gen || i->first >
head_gen) {
- l.unlock();
-- complete(std::move(p), r);
-+ complete(std::move(p), -ENODATA);
- return;
- }
- be = i->second;
-
-From 262466609208e81f8fe54560fd07a81a4b78cd68 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Tue, 9 Feb 2021 18:10:50 -0500
-Subject: [PATCH 21/26] rgw: Leave the zero'th shard of the zero'th generation
- for cls_lock
-
-Since data sync locks that object, instead of deleting it, truncate
-the object and clear the omap.
-
-(cls_lock uses xattrs.)
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 0d4e0abb8a699417ea75a6cd390786189ab964eb)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_log_backing.cc | 16 +++++++++++++---
- src/rgw/rgw_log_backing.h | 1 +
- 2 files changed, 14 insertions(+), 3 deletions(-)
-
-diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
-index eab60e672b9e8..67fc925586919 100644
---- a/src/rgw/rgw_log_backing.cc
-+++ b/src/rgw/rgw_log_backing.cc
-@@ -168,6 +168,7 @@ log_backing_type(librados::IoCtx& ioctx,
- bs::error_code log_remove(librados::IoCtx& ioctx,
- int shards,
- const fu2::unique_function<std::string(int) const>& get_oid,
-+ bool leave_zero,
- optional_yield y)
- {
- bs::error_code ec;
-@@ -204,7 +205,16 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
- << ", r=" << r << dendl;
- }
- librados::ObjectWriteOperation op;
-- op.remove();
-+ if (i == 0 && leave_zero) {
-+ // Leave shard 0 in existence, but remove contents and
-+ // omap. cls_lock stores things in the xattrs. And sync needs to
-+ // rendezvous with locks on generation 0 shard 0.
-+ op.omap_set_header({});
-+ op.omap_clear();
-+ op.truncate(0);
-+ } else {
-+ op.remove();
-+ }
- r = rgw_rados_operate(ioctx, oid, &op, null_yield);
- if (r < 0 && r != -ENOENT) {
- if (!ec)
-@@ -291,7 +301,7 @@ bs::error_code logback_generations::setup(log_type def,
- auto ec = log_remove(ioctx, shards,
- [this](int shard) {
- return this->get_oid(0, shard);
-- }, y);
-+ }, true, y);
- if (ec) return ec;
- }
- std::unique_lock lock(m);
-@@ -631,7 +641,7 @@ bs::error_code logback_generations::remove_empty(optional_yield y)
noexcept {
- auto ec = log_remove(ioctx, shards,
- [this, gen_id](int shard) {
- return this->get_oid(gen_id, shard);
-- }, y);
-+ }, (gen_id == 0), y);
- if (ec) {
- return ec;
- }
-diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
-index cd677764c5795..e592bc29b2bcf 100644
---- a/src/rgw/rgw_log_backing.h
-+++ b/src/rgw/rgw_log_backing.h
-@@ -88,6 +88,7 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
- /// A function taking a shard number and
- /// returning an oid.
- const fu2::unique_function<std::string(int) const>& get_oid,
-+ bool leave_zero,
- optional_yield y);
-
-
-
-From 497c4231beec9caa79d815d571f511040784bbb8 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Wed, 10 Feb 2021 16:18:09 -0500
-Subject: [PATCH 22/26] rgw: Wait until a generation has been empty for an hour
- to delete
-
-This fixes a problem where, while the backing handle remains allocated
-while a call completes, the objects it depends on may be deleted
-behind it.
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 7018c25d47edf7e12b581f7f28c2549fe73bde15)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_datalog.cc | 2 +-
- src/rgw/rgw_log_backing.cc | 37 +++++++++++++++++++++-----------
- src/rgw/rgw_log_backing.h | 8 +++----
- src/test/rgw/test_log_backing.cc | 24 ++++++++++-----------
- 4 files changed, 42 insertions(+), 29 deletions(-)
-
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index 0b68c45a13e8d..184d0713fb2a9 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -390,7 +390,7 @@ bs::error_code DataLogBackends::handle_init(entries_t e) noexcept {
- std::unique_lock l(m);
-
- for (const auto& [gen_id, gen] : e) {
-- if (gen.empty) {
-+ if (gen.pruned) {
- lderr(datalog.cct)
- << __PRETTY_FUNCTION__ << ":" << __LINE__
- << ": ERROR: given empty generation: gen_id=" << gen_id <<
dendl;
-diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
-index 67fc925586919..8ce88aa21414f 100644
---- a/src/rgw/rgw_log_backing.cc
-+++ b/src/rgw/rgw_log_backing.cc
-@@ -583,7 +583,7 @@ bs::error_code logback_generations::empty_to(uint64_t gen_id,
- }
- for (auto i = es.begin(); i < ei; ++i) {
- newtail = i->first;
-- i->second.empty = true;
-+ i->second.pruned = ceph::real_clock::now();
- }
- ec = write(std::move(es), std::move(l), y);
- ++tries;
-@@ -626,31 +626,44 @@ bs::error_code logback_generations::remove_empty(optional_yield y)
noexcept {
- entries_t new_entries;
- std::unique_lock l(m);
- ceph_assert(!entries_.empty());
-- auto i = lowest_nomempty(entries_);
-- if (i == entries_.begin()) {
-- return {};
-+ {
-+ auto i = lowest_nomempty(entries_);
-+ if (i == entries_.begin()) {
-+ return {};
-+ }
- }
-- auto ln = i->first;
- entries_t es;
-- std::copy(entries_.cbegin(), i,
-- std::inserter(es, es.end()));
-+ auto now = ceph::real_clock::now();
- l.unlock();
- do {
-+ std::copy_if(entries_.cbegin(), entries_.cend(),
-+ std::inserter(es, es.end()),
-+ [now](const auto& e) {
-+ if (!e.second.pruned)
-+ return false;
-+
-+ auto pruned = *e.second.pruned;
-+ return (now - pruned) >= 1h;
-+ });
-+ auto es2 = entries_;
- for (const auto& [gen_id, e] : es) {
-- ceph_assert(e.empty);
-+ ceph_assert(e.pruned);
- auto ec = log_remove(ioctx, shards,
- [this, gen_id](int shard) {
- return this->get_oid(gen_id, shard);
- }, (gen_id == 0), y);
- if (ec) {
-- return ec;
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": Error pruning: gen_id=" << gen_id
-+ << " ec=" << ec.message() << dendl;
-+ }
-+ if (auto i = es2.find(gen_id); i != es2.end()) {
-+ es2.erase(i);
- }
- }
- l.lock();
-- i = entries_.find(ln);
- es.clear();
-- std::copy(i, entries_.cend(), std::inserter(es, es.end()));
-- ec = write(std::move(es), std::move(l), y);
-+ ec = write(std::move(es2), std::move(l), y);
- ++tries;
- } while (ec == bs::errc::operation_canceled &&
- tries < max_tries);
-diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
-index e592bc29b2bcf..d5996049e5873 100644
---- a/src/rgw/rgw_log_backing.h
-+++ b/src/rgw/rgw_log_backing.h
-@@ -95,13 +95,13 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
- struct logback_generation {
- uint64_t gen_id = 0;
- log_type type;
-- bool empty = false;
-+ std::optional<ceph::real_time> pruned;
-
- void encode(ceph::buffer::list& bl) const {
- ENCODE_START(1, 1, bl);
- encode(gen_id, bl);
- encode(type, bl);
-- encode(empty, bl);
-+ encode(pruned, bl);
- ENCODE_FINISH(bl);
- }
-
-@@ -109,7 +109,7 @@ struct logback_generation {
- DECODE_START(1, bl);
- decode(gen_id, bl);
- decode(type, bl);
-- decode(empty, bl);
-+ decode(pruned, bl);
- DECODE_FINISH(bl);
- }
- };
-@@ -157,7 +157,7 @@ class logback_generations : public librados::WatchCtx2 {
- auto lowest_nomempty(const entries_t& es) {
- return std::find_if(es.begin(), es.end(),
- [](const auto& e) {
-- return !e.second.empty;
-+ return !e.second.pruned;
- });
- }
-
-diff --git a/src/test/rgw/test_log_backing.cc b/src/test/rgw/test_log_backing.cc
-index 166de2dd8242c..95f1e613936b0 100644
---- a/src/test/rgw/test_log_backing.cc
-+++ b/src/test/rgw/test_log_backing.cc
-@@ -241,7 +241,7 @@ TEST_F(LogBacking, GenerationSingle)
-
- ASSERT_EQ(0, lg->got_entries[0].gen_id);
- ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
-- ASSERT_FALSE(lg->got_entries[0].empty);
-+ ASSERT_FALSE(lg->got_entries[0].pruned);
-
- auto ec = lg->empty_to(0, null_yield);
- ASSERT_TRUE(ec);
-@@ -258,7 +258,7 @@ TEST_F(LogBacking, GenerationSingle)
-
- ASSERT_EQ(0, lg->got_entries[0].gen_id);
- ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
-- ASSERT_FALSE(lg->got_entries[0].empty);
-+ ASSERT_FALSE(lg->got_entries[0].pruned);
-
- lg->got_entries.clear();
-
-@@ -268,7 +268,7 @@ TEST_F(LogBacking, GenerationSingle)
- ASSERT_EQ(1, lg->got_entries.size());
- ASSERT_EQ(1, lg->got_entries[1].gen_id);
- ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
-- ASSERT_FALSE(lg->got_entries[1].empty);
-+ ASSERT_FALSE(lg->got_entries[1].pruned);
-
- lg.reset();
-
-@@ -280,11 +280,11 @@ TEST_F(LogBacking, GenerationSingle)
- ASSERT_EQ(2, lg->got_entries.size());
- ASSERT_EQ(0, lg->got_entries[0].gen_id);
- ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
-- ASSERT_FALSE(lg->got_entries[0].empty);
-+ ASSERT_FALSE(lg->got_entries[0].pruned);
-
- ASSERT_EQ(1, lg->got_entries[1].gen_id);
- ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
-- ASSERT_FALSE(lg->got_entries[1].empty);
-+ ASSERT_FALSE(lg->got_entries[1].pruned);
-
- ec = lg->empty_to(0, null_yield);
- ASSERT_FALSE(ec);
-@@ -301,7 +301,7 @@ TEST_F(LogBacking, GenerationSingle)
- ASSERT_EQ(1, lg->got_entries.size());
- ASSERT_EQ(1, lg->got_entries[1].gen_id);
- ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
-- ASSERT_FALSE(lg->got_entries[1].empty);
-+ ASSERT_FALSE(lg->got_entries[1].pruned);
-
- ec = lg->remove_empty(null_yield);
- ASSERT_FALSE(ec);
-@@ -311,7 +311,7 @@ TEST_F(LogBacking, GenerationSingle)
-
- ASSERT_EQ(1, entries[1].gen_id);
- ASSERT_EQ(log_type::omap, entries[1].type);
-- ASSERT_FALSE(entries[1].empty);
-+ ASSERT_FALSE(entries[1].pruned);
-
- lg.reset();
- }
-@@ -329,7 +329,7 @@ TEST_F(LogBacking, GenerationWN)
- ASSERT_EQ(1, lg1->got_entries.size());
- ASSERT_EQ(1, lg1->got_entries[1].gen_id);
- ASSERT_EQ(log_type::omap, lg1->got_entries[1].type);
-- ASSERT_FALSE(lg1->got_entries[1].empty);
-+ ASSERT_FALSE(lg1->got_entries[1].pruned);
-
- lg1->got_entries.clear();
-
-@@ -342,11 +342,11 @@ TEST_F(LogBacking, GenerationWN)
-
- ASSERT_EQ(0, lg2->got_entries[0].gen_id);
- ASSERT_EQ(log_type::fifo, lg2->got_entries[0].type);
-- ASSERT_FALSE(lg2->got_entries[0].empty);
-+ ASSERT_FALSE(lg2->got_entries[0].pruned);
-
- ASSERT_EQ(1, lg2->got_entries[1].gen_id);
- ASSERT_EQ(log_type::omap, lg2->got_entries[1].type);
-- ASSERT_FALSE(lg2->got_entries[1].empty);
-+ ASSERT_FALSE(lg2->got_entries[1].pruned);
-
- lg2->got_entries.clear();
-
-@@ -356,12 +356,12 @@ TEST_F(LogBacking, GenerationWN)
- ASSERT_EQ(1, lg1->got_entries.size());
- ASSERT_EQ(2, lg1->got_entries[2].gen_id);
- ASSERT_EQ(log_type::fifo, lg1->got_entries[2].type);
-- ASSERT_FALSE(lg1->got_entries[2].empty);
-+ ASSERT_FALSE(lg1->got_entries[2].pruned);
-
- ASSERT_EQ(1, lg2->got_entries.size());
- ASSERT_EQ(2, lg2->got_entries[2].gen_id);
- ASSERT_EQ(log_type::fifo, lg2->got_entries[2].type);
-- ASSERT_FALSE(lg2->got_entries[2].empty);
-+ ASSERT_FALSE(lg2->got_entries[2].pruned);
-
- lg1->got_entries.clear();
- lg2->got_entries.clear();
-
-From 73d6d04e7c8984ed00c82e93abcab58af81fe664 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Wed, 10 Feb 2021 17:09:02 -0500
-Subject: [PATCH 23/26] rgw: Try to prune empties even if no empties found
-
-Since we won't actually delete empties until much later.
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 9bd9b7659fdb7a1a01d5e1523f0d461dbf5eaafe)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_datalog.cc | 54 ++++++++++++++++++++----------------------
- 1 file changed, 26 insertions(+), 28 deletions(-)
-
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index 184d0713fb2a9..93a27a5639d05 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -881,38 +881,36 @@ void DataLogBackends::trim_entries(int shard_id, std::string_view
marker,
- }
-
- int DataLogBackends::trim_generations(std::optional<uint64_t>& through) {
-- if (size() == 1) {
-- return 0;
-- }
--
-- std::vector<mapped_type> candidates;
-- {
-- std::scoped_lock l(m);
-- auto e = cend() - 1;
-- for (auto i = cbegin(); i < e; ++i) {
-- candidates.push_back(i->second);
-+ if (size() != 1) {
-+ std::vector<mapped_type> candidates;
-+ {
-+ std::scoped_lock l(m);
-+ auto e = cend() - 1;
-+ for (auto i = cbegin(); i < e; ++i) {
-+ candidates.push_back(i->second);
-+ }
- }
-- }
-
-- std::optional<uint64_t> highest;
-- for (auto& be : candidates) {
-- auto r = be->is_empty();
-- if (r < 0) {
-- return r;
-- } else if (r == 1) {
-- highest = be->gen_id;
-- } else {
-- break;
-+ std::optional<uint64_t> highest;
-+ for (auto& be : candidates) {
-+ auto r = be->is_empty();
-+ if (r < 0) {
-+ return r;
-+ } else if (r == 1) {
-+ highest = be->gen_id;
-+ } else {
-+ break;
-+ }
- }
-- }
-
-- through = highest;
-- if (!highest) {
-- return 0;
-- }
-- auto ec = empty_to(*highest, null_yield);
-- if (ec) {
-- return ceph::from_error_code(ec);
-+ through = highest;
-+ if (!highest) {
-+ return 0;
-+ }
-+ auto ec = empty_to(*highest, null_yield);
-+ if (ec) {
-+ return ceph::from_error_code(ec);
-+ }
- }
-
- return ceph::from_error_code(remove_empty(null_yield));
-
-From 7e80b7403878b3c13d62f2f9bfe9c3c13a266500 Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Thu, 11 Feb 2021 18:27:33 -0500
-Subject: [PATCH 24/26] rgw: Make empty datalog fifo markers empty strings
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 4e3a7d5476fa2dd4b9825f4d546c42819f93c7cc)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_datalog.cc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
-index 93a27a5639d05..cb5cba7269fb1 100644
---- a/src/rgw/rgw_datalog.cc
-+++ b/src/rgw/rgw_datalog.cc
-@@ -320,7 +320,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
- fifo.meta(m, null_yield);
- auto p = m.head_part_num;
- if (p < 0) {
-- info->marker = rgw::cls::fifo::marker{}.to_string();
-+ info->marker = ""s;
- info->last_update = ceph::real_clock::zero();
- return 0;
- }
-
-From c3039ccdafe8350c29f18fbfdd79b096cb1f0a0d Mon Sep 17 00:00:00 2001
-From: "Adam C. Emerson" <aemerson(a)redhat.com>
-Date: Mon, 8 Mar 2021 15:17:53 -0500
-Subject: [PATCH 25/26] rgw: Fix probe failure on OSDs not suporting FIFO.
-
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
-(cherry picked from commit 4e9ec426b15fe60c5b0154980f808076e166dd02)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_log_backing.cc | 64 ++++++++++++++++++++++++--------------
- 1 file changed, 40 insertions(+), 24 deletions(-)
-
-diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
-index 8ce88aa21414f..c3037e13048bb 100644
---- a/src/rgw/rgw_log_backing.cc
-+++ b/src/rgw/rgw_log_backing.cc
-@@ -31,7 +31,8 @@ inline std::ostream& operator <<(std::ostream& m, const
shard_check& t) {
- namespace {
- /// Return the shard type, and a bool to see whether it has entries.
- std::pair<shard_check, bool>
--probe_shard(librados::IoCtx& ioctx, const std::string& oid, optional_yield y)
-+probe_shard(librados::IoCtx& ioctx, const std::string& oid,
-+ bool& fifo_unsupported, optional_yield y)
- {
- auto cct = static_cast<CephContext*>(ioctx.cct());
- bool omap = false;
-@@ -53,32 +54,38 @@ probe_shard(librados::IoCtx& ioctx, const std::string& oid,
optional_yield y)
- if (header != cls_log_header{})
- omap = true;
- }
-- std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
-- auto r = rgw::cls::fifo::FIFO::open(ioctx, oid,
-- &fifo, y,
-- std::nullopt, true);
-- if (r < 0 && !(r == -ENOENT || r == -ENODATA)) {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " error probing for fifo: r=" << r
-- << ", oid=" << oid << dendl;
-- return { shard_check::corrupt, {} };
-- }
-- if (fifo && omap) {
-- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << " fifo and omap found: oid=" << oid << dendl;
-- return { shard_check::corrupt, {} };
-- }
-- if (fifo) {
-- bool more = false;
-- std::vector<rgw::cls::fifo::list_entry> entries;
-- r = fifo->list(1, nullopt, &entries, &more, y);
-- if (r < 0) {
-+ if (!fifo_unsupported) {
-+ std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
-+ auto r = rgw::cls::fifo::FIFO::open(ioctx, oid,
-+ &fifo, y,
-+ std::nullopt, true);
-+ if (r < 0 && !(r == -ENOENT || r == -ENODATA || r == -EPERM)) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-- << ": unable to list entries: r=" << r
-+ << " error probing for fifo: r=" << r
- << ", oid=" << oid << dendl;
- return { shard_check::corrupt, {} };
- }
-- return { shard_check::fifo, !entries.empty() };
-+ if (fifo && omap) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " fifo and omap found: oid=" << oid << dendl;
-+ return { shard_check::corrupt, {} };
-+ }
-+ if (fifo) {
-+ bool more = false;
-+ std::vector<rgw::cls::fifo::list_entry> entries;
-+ r = fifo->list(1, nullopt, &entries, &more, y);
-+ if (r < 0) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << ": unable to list entries: r=" << r
-+ << ", oid=" << oid << dendl;
-+ return { shard_check::corrupt, {} };
-+ }
-+ return { shard_check::fifo, !entries.empty() };
-+ }
-+ if (r == -EPERM) {
-+ // Returned by OSD id CLS module not loaded.
-+ fifo_unsupported = true;
-+ }
- }
- if (omap) {
- std::list<cls_log_entry> entries;
-@@ -105,10 +112,17 @@ tl::expected<log_type, bs::error_code>
- handle_dne(librados::IoCtx& ioctx,
- log_type def,
- std::string oid,
-+ bool fifo_unsupported,
- optional_yield y)
- {
- auto cct = static_cast<CephContext*>(ioctx.cct());
- if (def == log_type::fifo) {
-+ if (fifo_unsupported) {
-+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
-+ << " WARNING: FIFO set as default but not supported by OSD. "
-+ << "Falling back to OMAP." << dendl;
-+ return log_type::omap;
-+ }
- std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
- auto r = rgw::cls::fifo::FIFO::create(ioctx, oid,
- &fifo, y,
-@@ -133,8 +147,9 @@ log_backing_type(librados::IoCtx& ioctx,
- {
- auto cct = static_cast<CephContext*>(ioctx.cct());
- auto check = shard_check::dne;
-+ bool fifo_unsupported = false;
- for (int i = 0; i < shards; ++i) {
-- auto [c, e] = probe_shard(ioctx, get_oid(i), y);
-+ auto [c, e] = probe_shard(ioctx, get_oid(i), fifo_unsupported, y);
- if (c == shard_check::corrupt)
- return tl::unexpected(bs::error_code(EIO, bs::system_category()));
- if (c == shard_check::dne) continue;
-@@ -160,6 +175,7 @@ log_backing_type(librados::IoCtx& ioctx,
- return handle_dne(ioctx,
- def,
- get_oid(0),
-+ fifo_unsupported,
- y);
-
- return (check == shard_check::fifo ? log_type::fifo : log_type::omap);
-
-From 9fcde9e37bb1e954ef837d12ba03387d63d4b020 Mon Sep 17 00:00:00 2001
-From: Yuval Lifshitz <ylifshit(a)redhat.com>
-Date: Sun, 4 Apr 2021 17:19:03 +0300
-Subject: [PATCH 26/26] rgw/multisite: handle case when empty marker is
- provided
-
-marker is potional, however, it may also be provided empty
-
-Fixes:
https://tracker.ceph.com/issues/50135
-
-Signed-off-by: Yuval Lifshitz <ylifshit(a)redhat.com>
-(cherry picked from commit fccf75eee3750a3654d2a2b1e3aa379edcfd8c8d)
-Signed-off-by: Adam C. Emerson <aemerson(a)redhat.com>
----
- src/rgw/rgw_log_backing.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
-index d5996049e5873..6f755efb46389 100644
---- a/src/rgw/rgw_log_backing.h
-+++ b/src/rgw/rgw_log_backing.h
-@@ -251,7 +251,7 @@ cursorgen(std::string_view cursor_) {
-
- inline std::pair<uint64_t, std::string_view>
- cursorgeno(std::optional<std::string_view> cursor) {
-- if (cursor) {
-+ if (cursor && !cursor->empty()) {
- return cursorgen(*cursor);
- } else {
- return { 0, ""s };
diff --git a/ceph.spec b/ceph.spec
index 0d3546f..fcce61c 100644
--- a/ceph.spec
+++ b/ceph.spec
@@ -26,7 +26,7 @@
%bcond_with zbd
%bcond_with cmake_verbose_logging
%bcond_without ceph_test_package
-%ifarch s390 s390x
+%ifarch s390
%bcond_with tcmalloc
%else
%bcond_without tcmalloc
@@ -125,7 +125,7 @@
# main package definition
#################################################################################
Name: ceph
-Version: 16.2.1
+Version: 16.2.2
Release: 1%{?dist}
%if 0%{?fedora} || 0%{?rhel}
Epoch: 2
@@ -146,14 +146,12 @@ Source0: %{?_remote_tarball_prefix}ceph-%{version}.tar.gz
Patch0001: 0001-src-common-crc32c_intel_fast.patch
Patch0002: 0002-src-common-CMakeLists.txt.patch
Patch0003: 0003-src-common-bitstr.h.patch
-Patch0004: 0004-src-CMakeLists.txt.patch
Patch0006: 0006-src-blk-CMakeLists.txt.patch
Patch0007: 0007-src-test-neorados-CMakeLists.txt.patch
Patch0008: 0008-cmake-modules-Finduring.cmake.patch
Patch0009: 0009-librgw-notifications-initialize-kafka-and-amqp.patch
Patch0010: 0010-os-bluestore-strip-trailing-slash-for-directory-list.patch
Patch0011: 0011-src-test-rgw-amqp_mock.cc.patch
-Patch0012: 0012-rgw.patch
# Source1: cmake-modules-BuildBoost.cmake.noautopatch
# ceph 14.0.1 does not support 32-bit architectures, bugs #1727788, #1727787
ExcludeArch: i686 armv7hl
@@ -188,8 +186,12 @@ BuildRequires: gcc-c++
%endif
BuildRequires: gdbm
%if 0%{with tcmalloc}
-%if 0%{?fedora} || 0%{?rhel}
-BuildRequires: gperftools-devel >= 2.6.1
+# libprofiler did not build on ppc64le until 2.7.90
+%if 0%{?fedora} || 0%{?rhel} >= 8
+BuildRequires: gperftools-devel >= 2.7.90
+%endif
+%if 0%{?rhel} && 0%{?rhel} < 8
+BuildRequires: gperftools-devel >= 2.6.1
%endif
%if 0%{?suse_version}
BuildRequires: gperftools-devel >= 2.4
@@ -347,6 +349,7 @@ BuildRequires: lz4-devel >= 1.7
# distro-conditional make check dependencies
%if 0%{with make_check}
%if 0%{?fedora} || 0%{?rhel}
+BuildRequires: golang-github-prometheus
BuildRequires: libtool-ltdl-devel
BuildRequires: xmlsec1
BuildRequires: xmlsec1-devel
@@ -363,6 +366,7 @@ BuildRequires: python%{python3_pkgversion}-werkzeug
BuildRequires: python%{python3_pkgversion}-pyOpenSSL
%endif
%if 0%{?suse_version}
+BuildRequires: golang-github-prometheus-prometheus
BuildRequires: libxmlsec1-1
BuildRequires: libxmlsec1-nss1
BuildRequires: libxmlsec1-openssl1
@@ -444,7 +448,7 @@ Requires: python%{python3_pkgversion}-setuptools
Requires: util-linux
Requires: xfsprogs
Requires: which
-%if 0%{?fedora} || 0%{?rhel}
+%if 0%{?rhel} && 0%{?rhel} < 8
# The following is necessary due to tracker 36508 and can be removed once the
# associated upstream bugs are resolved.
%if 0%{with tcmalloc}
@@ -728,6 +732,7 @@ Summary: Ceph daemon for immutable object cache
%if 0%{?suse_version}
Group: System/Filesystems
%endif
+Requires: ceph-base = %{_epoch_prefix}%{version}-%{release}
Requires: librados2 = %{_epoch_prefix}%{version}-%{release}
%description immutable-object-cache
Daemon for immutable object cache.
@@ -2494,6 +2499,9 @@ exit 0
%config %{_sysconfdir}/prometheus/ceph/ceph_default_alerts.yml
%changelog
+* Wed May 5 2021 Kaleb S. KEITHLEY <kkeithle[at]redhat.com> - 2:16.2.2-1
+- 16.2.2 GA
+
* Tue Apr 20 2021 Kaleb S. KEITHLEY <kkeithle[at]redhat.com> - 2:16.2.1-1
- 16.2.1 GA
diff --git a/sources b/sources
index 3de5593..adc6360 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-SHA512 (ceph-16.2.1.tar.gz) =
42e404aa41565485dc4c08b66efb3de719adee72cfc535586992e15ab4601bfbd909ef61abf412badbbfa7ef13bf91a7c0576fbbf3f9f687bc7b465b21a8c416
+SHA512 (ceph-16.2.2.tar.gz) =
4269fe07821400655c5cfcc2828b4a39a0adc81ef0045d86b50b539c13c151c9ace9710fe365bbf2de7e5134e03c45c88f19f4adcb7c4d608eab4b9580da113c
commit 78e076b5afe9c86d02d40e39be312d9c7e2c7bab
Author: Kaleb S. KEITHLEY <kkeithle(a)redhat.com>
Date: Tue Apr 20 08:34:14 2021 -0400
16.2.1 GA
Signed-off-by: Kaleb S. KEITHLEY <kkeithle(a)redhat.com>
diff --git a/ceph.spec b/ceph.spec
index 5b81d7f..0d3546f 100644
--- a/ceph.spec
+++ b/ceph.spec
@@ -125,8 +125,8 @@
# main package definition
#################################################################################
Name: ceph
-Version: 16.2.0
-Release: 3%{?dist}
+Version: 16.2.1
+Release: 1%{?dist}
%if 0%{?fedora} || 0%{?rhel}
Epoch: 2
%endif
@@ -154,7 +154,7 @@ Patch0009: 0009-librgw-notifications-initialize-kafka-and-amqp.patch
Patch0010: 0010-os-bluestore-strip-trailing-slash-for-directory-list.patch
Patch0011: 0011-src-test-rgw-amqp_mock.cc.patch
Patch0012: 0012-rgw.patch
-Source1: cmake-modules-BuildBoost.cmake.noautopatch
+# Source1: cmake-modules-BuildBoost.cmake.noautopatch
# ceph 14.0.1 does not support 32-bit architectures, bugs #1727788, #1727787
ExcludeArch: i686 armv7hl
%if 0%{?suse_version}
@@ -1224,9 +1224,9 @@ This package provides Ceph default alerts for Prometheus.
#################################################################################
%prep
%autosetup -p1
-%ifarch x86_64
-patch -p1 < %{SOURCE1}
-%endif
+# %%ifarch x86_64
+# patch -p1 < %{SOURCE1}
+# %%endif
%build
# LTO can be enabled as soon as the following GCC bug is fixed:
@@ -2494,6 +2494,9 @@ exit 0
%config %{_sysconfdir}/prometheus/ceph/ceph_default_alerts.yml
%changelog
+* Tue Apr 20 2021 Kaleb S. KEITHLEY <kkeithle[at]redhat.com> - 2:16.2.1-1
+- 16.2.1 GA
+
* Wed Apr 14 2021 Richard W.M. Jones <rjones(a)redhat.com> - 2:16.2.0-3
- Rebuild for updated liburing.
diff --git a/sources b/sources
index dbd8960..3de5593 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-SHA512 (ceph-16.2.0.tar.gz) =
aeb9a91c33221c64ea24603dc88cab346bf0abdf0d41ff85c2e1cf134130737ec1fab1246be0a2fa3af8a655ae1dabb69688855f229f54438e60cb8098175b8e
+SHA512 (ceph-16.2.1.tar.gz) =
42e404aa41565485dc4c08b66efb3de719adee72cfc535586992e15ab4601bfbd909ef61abf412badbbfa7ef13bf91a7c0576fbbf3f9f687bc7b465b21a8c416