diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 18615c202..93a3a6004 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -47,6 +47,7 @@ set(ICEBERG_SOURCES inheritable_metadata.cc json_serde.cc location_provider.cc + logging/logger.cc manifest/manifest_adapter.cc manifest/manifest_entry.cc manifest/manifest_filter_manager.cc diff --git a/src/iceberg/logging/logger.cc b/src/iceberg/logging/logger.cc new file mode 100644 index 000000000..cd864bc42 --- /dev/null +++ b/src/iceberg/logging/logger.cc @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/logging/logger.h" + +#include +#include +#include +#include +#include + +namespace iceberg { + +namespace { + +/// \brief Logger that drops every record. +class NoopLogger final : public Logger { + public: + bool ShouldLog(LogLevel /*level*/) const override { return false; } + void Log(LogMessage&& /*message*/) noexcept override {} + void SetLevel(LogLevel /*level*/) override {} + LogLevel level() const override { return LogLevel::kOff; } + bool IsNoop() const override { return true; } +}; + +/// \brief Construct the process default logger for this build configuration. +/// +/// This block ships only the interface and the no-op logger; the concrete +/// std::cerr and spdlog sinks (and the build-config selection between them) +/// arrive in later blocks, which update this factory. +std::shared_ptr MakeDefaultLogger() { return std::make_shared(); } + +/// \brief The process-global default-logger slot. +struct DefaultSlot { + std::mutex mtx; + std::shared_ptr logger; + // Seeded to 1 so a fresh thread (tls_gen == 0) always refreshes on first use. + std::atomic gen{1}; + + DefaultSlot() : logger(MakeDefaultLogger()) {} +}; + +/// \brief Immortal (leaked, hence reachable -> LSan-clean) accessor for the slot. +DefaultSlot& Slot() { + static auto* slot = new DefaultSlot(); + return *slot; +} + +} // namespace + +std::shared_ptr Logger::Noop() { + // Intentionally leaked: reachable via the function-local static (LSan-clean) + // and never destroyed, so logging during static teardown stays safe. + static auto* instance = new std::shared_ptr(std::make_shared()); + return *instance; +} + +std::shared_ptr GetDefaultLogger() { + DefaultSlot& slot = Slot(); + std::lock_guard lock(slot.mtx); + return slot.logger; +} + +void SetDefaultLogger(std::shared_ptr logger) { + if (!logger) { + logger = Logger::Noop(); + } + DefaultSlot& slot = Slot(); + std::lock_guard lock(slot.mtx); + slot.logger = std::move(logger); + // Publish the swap; the mutex provides the happens-before, gen is a detector. + slot.gen.fetch_add(1, std::memory_order_relaxed); +} + +void SetDefaultLevel(LogLevel level) { + DefaultSlot& slot = Slot(); + std::lock_guard lock(slot.mtx); + slot.logger->SetLevel(level); +} + +namespace detail { + +const std::shared_ptr& CurrentLogger() noexcept { + static thread_local std::shared_ptr tls; + static thread_local uint64_t tls_gen = 0; + // Sentinel whose destructor marks the cache dead at thread exit. It is + // declared after tls/tls_gen, so it is destroyed FIRST (reverse order); once + // dead, a log from any later-destroyed thread_local destructor must not touch + // the (about-to-be / already) destroyed tls slot. + static thread_local struct AliveFlag { + bool value = true; + ~AliveFlag() { value = false; } + } alive; + if (!alive.value) { + // Thread teardown: the TLS cache is unsafe. Fall back to an immortal logger + // (leaked, never destroyed) so logging during teardown stays safe. + static const std::shared_ptr kFallback = Logger::Noop(); + return kFallback; + } + DefaultSlot& slot = Slot(); + uint64_t current = slot.gen.load(std::memory_order_relaxed); + if (current != tls_gen) { + std::lock_guard lock(slot.mtx); + tls = slot.logger; + tls_gen = current; + } + return tls; +} + +void Emit(Logger& logger, LogLevel level, const std::source_location& location, + std::string&& message) { + logger.Log(LogMessage{.level = level, + .message = std::move(message), + .location = location, + .attributes = {}}); +} + +void EmitFormatError(Logger& logger, LogLevel level, + const std::source_location& location) noexcept { + // Fixed short literal (<= 15 bytes, fits SSO on libstdc++/libc++/MSVC -> no heap + // allocation), no std::format, no retry. Cannot throw or recurse. + logger.Log(LogMessage{.level = level, + .message = std::string(""), + .location = location, + .attributes = {}}); +} + +} // namespace detail + +} // namespace iceberg diff --git a/src/iceberg/logging/logger.h b/src/iceberg/logging/logger.h new file mode 100644 index 000000000..58f5691af --- /dev/null +++ b/src/iceberg/logging/logger.h @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/logging/logger.h +/// \brief Pluggable logging interface, the process-global default logger, and +/// the logging macros. +/// +/// This header is backend-agnostic: it never includes the build-generated +/// backend configuration header and never references the spdlog feature macro, +/// so consumers see one stable API regardless of how the backend was configured. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/logging/log_level.h" +#include "iceberg/result.h" + +namespace iceberg { + +/// \brief A structured key/value attribute attached to a log record. +/// +/// Both key and value are owned so a sink may retain the record safely. +/// Unused in v1; reserved so structured logging can be added without an ABI +/// break to LogMessage. +struct LogAttribute { + std::string key; + std::string value; +}; + +/// \brief A single log record handed to a Logger. +/// +/// The formatted message is owned (moved in by the logging macros), so a sink +/// may safely retain the record beyond the Log() call. The member set must not +/// depend on the build's logging backend (the spdlog backend never appears here). +struct LogMessage { + LogLevel level = LogLevel::kOff; + std::string message; + std::source_location location = std::source_location::current(); + std::vector attributes; +}; + +/// \brief Well-known Logger::Initialize() property keys. +/// +/// `level` is honored by the base Logger::Initialize (parsed via +/// LogLevelFromString). `pattern` is honored by the formatting sinks +/// (CerrLogger, SpdLogger). +inline constexpr std::string_view kLevelProperty = "level"; +inline constexpr std::string_view kPatternProperty = "pattern"; + +/// \brief Pluggable logging sink. +/// +/// ShouldLog() is the single authority for runtime filtering -- the macros call +/// it on every (compile-time-enabled) statement, so level changes by any path +/// take effect immediately. Implementations must be thread-safe and must not +/// throw. They must also obey: +/// - No reentrancy: Log()/Flush() must not call the logging macros or +/// GetDefaultLogger() (UB -- deadlock with mutex-based sinks). +/// - level() is an accessor consistent with ShouldLog (used by SetDefaultLevel +/// and introspection); ShouldLog may implement finer logic than a level compare. +class ICEBERG_EXPORT Logger { + public: + virtual ~Logger() = default; + + /// \brief Property-based setup, called by Loggers::Load() before first use. + /// + /// The base implementation applies the "level" property (parsed via + /// LogLevelFromString); an unrecognized value is an InvalidArgument error. + /// Formatting sinks override this to also apply "pattern" and then delegate + /// to this base for "level". + virtual Status Initialize( + const std::unordered_map& properties) { + if (auto it = properties.find(std::string(kLevelProperty)); it != properties.end()) { + auto parsed = LogLevelFromString(it->second); + if (!parsed) return std::unexpected(parsed.error()); + SetLevel(*parsed); + } + return {}; + } + + /// \brief Cheap check whether a record at \p level would be emitted. + virtual bool ShouldLog(LogLevel level) const = 0; + + /// \brief Emit one (already-formatted) record, taking ownership. Must not throw. + virtual void Log(LogMessage&& message) noexcept = 0; + + /// \brief Set the minimum level this logger emits. + virtual void SetLevel(LogLevel level) = 0; + + /// \brief Return the minimum level this logger emits. + virtual LogLevel level() const = 0; + + /// \brief Flush any buffered output. Must not throw; best-effort on the fatal path. + virtual void Flush() noexcept {} + + /// \brief Return true if this logger is a no-op. + virtual bool IsNoop() const { return false; } + + /// \brief Return a shared, immortal no-op logger singleton. + static std::shared_ptr Noop(); +}; + +/// \brief Return the process-global default logger (never null). +/// +/// Off the hot path -- acquires the slot lock and returns an owning copy. The +/// logging macros use the cheaper internal hot-path accessor instead. +ICEBERG_EXPORT std::shared_ptr GetDefaultLogger(); + +/// \brief Install a new process-global default logger. +/// +/// A null argument installs the no-op logger. Thread-safe; intended for +/// occasional (configuration-time) use rather than the hot path. +ICEBERG_EXPORT void SetDefaultLogger(std::shared_ptr logger); + +/// \brief Set the minimum level of the current default logger. +/// +/// Convenience for `GetDefaultLogger()->SetLevel(level)`. Filtering is always +/// decided by the logger's own ShouldLog(), so changing a logger's level by any +/// means (this, SetLevel on a held handle, or Initialize) takes effect immediately. +ICEBERG_EXPORT void SetDefaultLevel(LogLevel level); + +// --------------------------------------------------------------------------- +// Using the API directly (the LOG_* macros that wrap this are added later in +// the stack). Example: a custom sink, installed as the process default. +// +// class MySink : public Logger { +// public: +// bool ShouldLog(LogLevel level) const override { return level >= level_; } +// void Log(LogMessage&& m) noexcept override { write_line(m.message); } +// void SetLevel(LogLevel level) override { level_ = level; } +// LogLevel level() const override { return level_; } +// private: +// std::atomic level_{LogLevel::kInfo}; +// }; +// +// SetDefaultLogger(std::make_shared()); // install process-wide +// SetDefaultLevel(LogLevel::kDebug); // adjust the threshold +// +// auto logger = GetDefaultLogger(); // borrow the current default +// if (logger->ShouldLog(LogLevel::kInfo)) { +// logger->Log(LogMessage{.level = LogLevel::kInfo, .message = "scan ready"}); +// } +// +// // Or configure from catalog-style properties (applies the "level" key): +// auto sink = std::make_shared(); +// auto status = sink->Initialize({{std::string(kLevelProperty), "warn"}}); // -> kWarn +// --------------------------------------------------------------------------- + +namespace detail { + +/// \brief Hot-path accessor for the default logger. +/// +/// Returns a reference to a thread-local cached shared_ptr that is refreshed +/// only when the default logger has changed (no lock / no refcount churn in +/// steady state). The reference is valid for the duration of the calling +/// statement. +ICEBERG_EXPORT const std::shared_ptr& CurrentLogger() noexcept; + +/// \brief Build a LogMessage from the already-formatted text and dispatch it. +/// +/// Declared ICEBERG_EXPORT because the logging macros expand into this call in +/// consumer translation units. +ICEBERG_EXPORT void Emit(Logger& logger, LogLevel level, + const std::source_location& location, std::string&& message); + +/// \brief Emit a fixed fallback record when formatting threw. +/// +/// noexcept, allocation-light (small/SSO literal), performs no std::format, and +/// does not recurse -- so the macro's "logging never throws" guarantee holds +/// even when a format argument throws. +ICEBERG_EXPORT void EmitFormatError(Logger& logger, LogLevel level, + const std::source_location& location) noexcept; + +/// \brief Runtime (non-literal) format-string helper. +/// +/// std::format requires a compile-time format string; this routes a runtime +/// string through std::vformat. Args are bound as named lvalues and the +/// arg-store is held in a named variable so it outlives the vformat call +/// (C++23 make_format_args rejects rvalues -- P2905 / LWG3631). +template +std::string VFormat(std::string_view fmt, Args&&... args) { + auto store = std::make_format_args(args...); + return std::vformat(fmt, store); +} + +} // namespace detail + +} // namespace iceberg diff --git a/src/iceberg/logging/meson.build b/src/iceberg/logging/meson.build index 3c286a196..3f7af4fec 100644 --- a/src/iceberg/logging/meson.build +++ b/src/iceberg/logging/meson.build @@ -15,4 +15,4 @@ # specific language governing permissions and limitations # under the License. -install_headers(['log_level.h'], subdir: 'iceberg/logging') +install_headers(['log_level.h', 'logger.h'], subdir: 'iceberg/logging') diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build index 7a6ba4f8b..a0a0be610 100644 --- a/src/iceberg/meson.build +++ b/src/iceberg/meson.build @@ -72,6 +72,7 @@ iceberg_sources = files( 'inspect/snapshots_table.cc', 'json_serde.cc', 'location_provider.cc', + 'logging/logger.cc', 'manifest/manifest_adapter.cc', 'manifest/manifest_entry.cc', 'manifest/manifest_filter_manager.cc', diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 0a7db5ed2..16575c161 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -102,7 +102,7 @@ add_iceberg_test(table_test table_test.cc table_update_test.cc) -add_iceberg_test(logging_test SOURCES log_level_test.cc) +add_iceberg_test(logging_test SOURCES log_level_test.cc logger_test.cc) add_iceberg_test(expression_test SOURCES diff --git a/src/iceberg/test/logger_test.cc b/src/iceberg/test/logger_test.cc new file mode 100644 index 000000000..ed1c88aa5 --- /dev/null +++ b/src/iceberg/test/logger_test.cc @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/logging/logger.h" + +#include +#include +#include +#include + +#include + +#include "iceberg/logging/log_level.h" +#include "iceberg/test/logging_test_helpers.h" + +namespace iceberg { + +TEST(LoggerTest, NoopIsSharedImmortalAndSilent) { + auto noop = Logger::Noop(); + ASSERT_NE(noop, nullptr); + EXPECT_TRUE(noop->IsNoop()); + EXPECT_FALSE(noop->ShouldLog(LogLevel::kFatal)); + EXPECT_EQ(noop->level(), LogLevel::kOff); + // Same singleton instance every call. + EXPECT_EQ(noop.get(), Logger::Noop().get()); +} + +TEST(LoggerTest, DefaultLoggerIsNeverNull) { EXPECT_NE(GetDefaultLogger(), nullptr); } + +TEST(LoggerTest, SetAndGetDefaultLogger) { + auto capturing = std::make_shared(); + ScopedDefaultLogger guard(capturing); + EXPECT_EQ(GetDefaultLogger().get(), capturing.get()); + EXPECT_EQ(detail::CurrentLogger().get(), capturing.get()); +} + +TEST(LoggerTest, SetNullFallsBackToNoop) { + ScopedDefaultLogger guard(std::make_shared()); + SetDefaultLogger(nullptr); + EXPECT_TRUE(GetDefaultLogger()->IsNoop()); +} + +TEST(LoggerTest, CurrentLoggerTracksSwaps) { + auto first = std::make_shared(); + auto second = std::make_shared(); + ScopedDefaultLogger guard(first); + EXPECT_EQ(detail::CurrentLogger().get(), first.get()); + SetDefaultLogger(second); + // Generation bump must invalidate the thread-local cache. + EXPECT_EQ(detail::CurrentLogger().get(), second.get()); +} + +TEST(LoggerTest, SetDefaultLevelUpdatesLogger) { + auto capturing = std::make_shared(); + ScopedDefaultLogger guard(capturing); + SetDefaultLevel(LogLevel::kError); + EXPECT_EQ(capturing->level(), LogLevel::kError); +} + +// Filtering is decided by the logger's own ShouldLog (no separate cached gate), +// so lowering a logger's level out-of-band (not via SetDefaultLevel) takes effect +// immediately -- this is the regression guard for the dropped g_effective_level gate. +TEST(LoggerTest, OutOfBandLevelLoweringTakesEffect) { + auto capturing = std::make_shared(); + capturing->SetLevel(LogLevel::kError); + ScopedDefaultLogger guard(capturing); + EXPECT_FALSE(detail::CurrentLogger()->ShouldLog(LogLevel::kInfo)); + capturing->SetLevel(LogLevel::kTrace); // lowered directly on the handle + EXPECT_TRUE(detail::CurrentLogger()->ShouldLog(LogLevel::kInfo)); +} + +TEST(LoggerTest, ConcurrentSwapAndReadIsSafe) { + // Stress CurrentLogger()/GetDefaultLogger() against SetDefaultLogger() swaps. + // Run under TSan in CI; here it asserts no crash and a valid logger throughout. + auto a = std::make_shared(); + auto b = std::make_shared(); + ScopedDefaultLogger guard(a); + std::atomic stop{false}; + std::atomic saw_null{false}; + std::vector readers; + for (int i = 0; i < 6; ++i) { + readers.emplace_back([&stop, &saw_null] { + // ASSERT_* doesn't propagate from non-main threads; record via a flag. + while (!stop.load(std::memory_order_relaxed)) { + const auto& l = detail::CurrentLogger(); + if (!l) saw_null.store(true, std::memory_order_relaxed); + (void)l->ShouldLog(LogLevel::kError); + (void)GetDefaultLogger(); + } + }); + } + for (int i = 0; i < 2000; ++i) SetDefaultLogger((i & 1) ? a : b); + stop.store(true, std::memory_order_relaxed); + for (auto& t : readers) t.join(); + EXPECT_FALSE(saw_null.load()); // CurrentLogger() is never null across swaps +} + +TEST(LoggerTest, InitializeAppliesLevelProperty) { + CapturingLogger logger; + auto status = logger.Initialize({{std::string(kLevelProperty), std::string("error")}}); + ASSERT_TRUE(status.has_value()); + EXPECT_EQ(logger.level(), LogLevel::kError); +} + +TEST(LoggerTest, InitializeRejectsInvalidLevel) { + CapturingLogger logger; + auto status = + logger.Initialize({{std::string(kLevelProperty), std::string("not-a-level")}}); + ASSERT_FALSE(status.has_value()); + EXPECT_EQ(status.error().kind, ErrorKind::kInvalidArgument); +} + +// Logging during thread teardown (from a thread_local destructor) must not crash: +// CurrentLogger() serves the live cache or falls back to an immortal logger. Run +// under ASan in CI for full signal. +TEST(LoggerTest, LoggingFromThreadLocalDestructorIsSafe) { + std::thread([] { + struct Probe { + ~Probe() { + const auto& logger = detail::CurrentLogger(); + if (logger) { + detail::Emit(*logger, LogLevel::kInfo, std::source_location::current(), + "from thread_local dtor"); + } + } + }; + static thread_local Probe probe; + (void)probe; + }).join(); + SUCCEED(); +} + +} // namespace iceberg diff --git a/src/iceberg/test/logging_test_helpers.h b/src/iceberg/test/logging_test_helpers.h new file mode 100644 index 000000000..00a57cf82 --- /dev/null +++ b/src/iceberg/test/logging_test_helpers.h @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "iceberg/logging/logger.h" + +namespace iceberg { + +/// \brief Test sink that records every emitted LogMessage under a mutex. +class CapturingLogger : public Logger { + public: + bool ShouldLog(LogLevel level) const override { return level >= level_; } + + void Log(LogMessage&& message) noexcept override { + std::lock_guard lock(mutex_); + records_.push_back(std::move(message)); + } + + void SetLevel(LogLevel level) override { level_ = level; } + LogLevel level() const override { return level_; } + + std::vector records() const { + std::lock_guard lock(mutex_); + return records_; + } + + std::size_t count() const { + std::lock_guard lock(mutex_); + return records_.size(); + } + + private: + mutable std::mutex mutex_; + LogLevel level_ = LogLevel::kTrace; + std::vector records_; +}; + +/// \brief RAII guard that restores the process default logger on scope exit, so +/// tests that swap the global default don't leak state into other tests. +class ScopedDefaultLogger { + public: + explicit ScopedDefaultLogger(std::shared_ptr logger) + : previous_(GetDefaultLogger()) { + SetDefaultLogger(std::move(logger)); + } + ~ScopedDefaultLogger() { SetDefaultLogger(previous_); } + + ScopedDefaultLogger(const ScopedDefaultLogger&) = delete; + ScopedDefaultLogger& operator=(const ScopedDefaultLogger&) = delete; + + private: + std::shared_ptr previous_; +}; + +} // namespace iceberg diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index a76a15553..bd19c20f2 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -61,7 +61,7 @@ iceberg_tests = { 'table_update_test.cc', ), }, - 'logging_test': {'sources': files('log_level_test.cc')}, + 'logging_test': {'sources': files('log_level_test.cc', 'logger_test.cc')}, 'expression_test': { 'sources': files( 'aggregate_test.cc',