Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ set(ICEBERG_SOURCES
inheritable_metadata.cc
json_serde.cc
location_provider.cc
logging/logger.cc
manifest/manifest_adapter.cc
manifest/manifest_entry.cc
manifest/manifest_filter_manager.cc
Expand Down
146 changes: 146 additions & 0 deletions src/iceberg/logging/logger.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/logging/logger.h"

#include <atomic>
#include <cstdint>
#include <memory>
#include <mutex>
#include <utility>

namespace iceberg {

namespace {

/// \brief Logger that drops every record.
class NoopLogger final : public Logger {
public:
bool ShouldLog(LogLevel /*level*/) const override { return false; }
void Log(LogMessage&& /*message*/) noexcept override {}
void SetLevel(LogLevel /*level*/) override {}
LogLevel level() const override { return LogLevel::kOff; }
bool IsNoop() const override { return true; }
};

/// \brief Construct the process default logger for this build configuration.
///
/// This block ships only the interface and the no-op logger; the concrete
/// std::cerr and spdlog sinks (and the build-config selection between them)
/// arrive in later blocks, which update this factory.
std::shared_ptr<Logger> MakeDefaultLogger() { return std::make_shared<NoopLogger>(); }

/// \brief The process-global default-logger slot.
struct DefaultSlot {
std::mutex mtx;
std::shared_ptr<Logger> logger;
// Seeded to 1 so a fresh thread (tls_gen == 0) always refreshes on first use.
std::atomic<uint64_t> gen{1};

DefaultSlot() : logger(MakeDefaultLogger()) {}
};

/// \brief Immortal (leaked, hence reachable -> LSan-clean) accessor for the slot.
DefaultSlot& Slot() {
static auto* slot = new DefaultSlot();
return *slot;
}

} // namespace

std::shared_ptr<Logger> Logger::Noop() {
// Intentionally leaked: reachable via the function-local static (LSan-clean)
// and never destroyed, so logging during static teardown stays safe.
static auto* instance = new std::shared_ptr<Logger>(std::make_shared<NoopLogger>());
return *instance;
}

std::shared_ptr<Logger> GetDefaultLogger() {
DefaultSlot& slot = Slot();
std::lock_guard<std::mutex> lock(slot.mtx);
return slot.logger;
}

void SetDefaultLogger(std::shared_ptr<Logger> logger) {
if (!logger) {
logger = Logger::Noop();
}
DefaultSlot& slot = Slot();
std::lock_guard<std::mutex> lock(slot.mtx);
slot.logger = std::move(logger);
// Publish the swap; the mutex provides the happens-before, gen is a detector.
slot.gen.fetch_add(1, std::memory_order_relaxed);
}

void SetDefaultLevel(LogLevel level) {
DefaultSlot& slot = Slot();
std::lock_guard<std::mutex> lock(slot.mtx);
slot.logger->SetLevel(level);
}

namespace detail {

const std::shared_ptr<Logger>& CurrentLogger() noexcept {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is still not safe for thread_local teardown.

One case: a thread creates another thread_local object first. Then it calls CurrentLogger() in normal code. At thread exit, alive is destroyed before that earlier object. If that earlier object logs in its destructor, we read alive.value after alive is already destroyed. This is UB.

The test only checks first use from a TLS destructor. Maybe we need a design without destructible TLS state, or leak the per-thread cache.

static thread_local std::shared_ptr<Logger> tls;
static thread_local uint64_t tls_gen = 0;
// Sentinel whose destructor marks the cache dead at thread exit. It is
// declared after tls/tls_gen, so it is destroyed FIRST (reverse order); once
// dead, a log from any later-destroyed thread_local destructor must not touch
// the (about-to-be / already) destroyed tls slot.
static thread_local struct AliveFlag {
bool value = true;
~AliveFlag() { value = false; }
} alive;
if (!alive.value) {
// Thread teardown: the TLS cache is unsafe. Fall back to an immortal logger
// (leaked, never destroyed) so logging during teardown stays safe.
static const std::shared_ptr<Logger> kFallback = Logger::Noop();
return kFallback;
}
DefaultSlot& slot = Slot();
uint64_t current = slot.gen.load(std::memory_order_relaxed);
if (current != tls_gen) {
std::lock_guard<std::mutex> lock(slot.mtx);
tls = slot.logger;
tls_gen = current;
}
return tls;
}

void Emit(Logger& logger, LogLevel level, const std::source_location& location,
std::string&& message) {
logger.Log(LogMessage{.level = level,
.message = std::move(message),
.location = location,
.attributes = {}});
}

void EmitFormatError(Logger& logger, LogLevel level,
const std::source_location& location) noexcept {
// Fixed short literal (<= 15 bytes, fits SSO on libstdc++/libc++/MSVC -> no heap
// allocation), no std::format, no retry. Cannot throw or recurse.
logger.Log(LogMessage{.level = level,
.message = std::string("<fmt error>"),
.location = location,
.attributes = {}});
}

} // namespace detail

} // namespace iceberg
213 changes: 213 additions & 0 deletions src/iceberg/logging/logger.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

/// \file iceberg/logging/logger.h
/// \brief Pluggable logging interface, the process-global default logger, and

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small doc issue. This header says it has logging macros, but this PR does not add them.

The PR text says macros are in a later block. Users who read the installed header may be confused.

Maybe say this header has the logger API and macro helper hooks. Or move this sentence to the later PR.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

/// the logging macros.
///
/// This header is backend-agnostic: it never includes the build-generated
/// backend configuration header and never references the spdlog feature macro,
/// so consumers see one stable API regardless of how the backend was configured.

#include <cstdlib>
#include <format>
#include <memory>
#include <source_location>
#include <string>
#include <string_view>
#include <unordered_map>
#include <utility>
#include <vector>

#include "iceberg/iceberg_export.h"
#include "iceberg/logging/log_level.h"
#include "iceberg/result.h"

namespace iceberg {

/// \brief A structured key/value attribute attached to a log record.
///
/// Both key and value are owned so a sink may retain the record safely.
/// Unused in v1; reserved so structured logging can be added without an ABI
/// break to LogMessage.
struct LogAttribute {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to add ICEBERG_EXPORT to this and below?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

std::string key;
std::string value;
};

/// \brief A single log record handed to a Logger.
///
/// The formatted message is owned (moved in by the logging macros), so a sink
/// may safely retain the record beyond the Log() call. The member set must not
/// depend on the build's logging backend (the spdlog backend never appears here).
struct LogMessage {
LogLevel level = LogLevel::kOff;
std::string message;
std::source_location location = std::source_location::current();
std::vector<LogAttribute> attributes;

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the attributes field is useful, but now there is no easy API to fill it.

For engine loggers, structured fields are often important. For example query id, task id, table name, snapshot id, file path, etc. If the first public API only supports message text in practice, it may be harder to add good structured logging later.

Can we add a small helper or builder path for attributes, or at least define how future macros will pass them?

};

/// \brief Well-known Logger::Initialize() property keys.
///
/// `level` is honored by the base Logger::Initialize (parsed via
/// LogLevelFromString). `pattern` is honored by the formatting sinks
/// (CerrLogger, SpdLogger).
inline constexpr std::string_view kLevelProperty = "level";
inline constexpr std::string_view kPatternProperty = "pattern";

/// \brief Pluggable logging sink.
///
/// ShouldLog() is the single authority for runtime filtering -- the macros call
/// it on every (compile-time-enabled) statement, so level changes by any path
/// take effect immediately. Implementations must be thread-safe and must not
/// throw. They must also obey:
/// - No reentrancy: Log()/Flush() must not call the logging macros or
/// GetDefaultLogger() (UB -- deadlock with mutex-based sinks).
/// - level() is an accessor consistent with ShouldLog (used by SetDefaultLevel
/// and introspection); ShouldLog may implement finer logic than a level compare.
class ICEBERG_EXPORT Logger {
public:
virtual ~Logger() = default;

/// \brief Property-based setup, called by Loggers::Load() before first use.
///
/// The base implementation applies the "level" property (parsed via
/// LogLevelFromString); an unrecognized value is an InvalidArgument error.
/// Formatting sinks override this to also apply "pattern" and then delegate
/// to this base for "level".
virtual Status Initialize(
const std::unordered_map<std::string, std::string>& properties) {
if (auto it = properties.find(std::string(kLevelProperty)); it != properties.end()) {
auto parsed = LogLevelFromString(it->second);
if (!parsed) return std::unexpected(parsed.error());
SetLevel(*parsed);
}
return {};
}

/// \brief Cheap check whether a record at \p level would be emitted.
virtual bool ShouldLog(LogLevel level) const = 0;

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The doc says logger implementations must not throw. ShouldLog() is also in the hot path.

But the virtual method is not noexcept. A custom engine logger may throw here by mistake. Then logging can throw from the filter path. Adding noexcept later is also an API/ABI change.

Can we mark ShouldLog() noexcept now? Maybe SetLevel() and level() too, if we want the same rule for all logger methods.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


/// \brief Emit one (already-formatted) record, taking ownership. Must not throw.
virtual void Log(LogMessage&& message) noexcept = 0;

/// \brief Set the minimum level this logger emits.
virtual void SetLevel(LogLevel level) = 0;

/// \brief Return the minimum level this logger emits.
virtual LogLevel level() const = 0;

/// \brief Flush any buffered output. Must not throw; best-effort on the fatal path.
virtual void Flush() noexcept {}

/// \brief Return true if this logger is a no-op.
virtual bool IsNoop() const { return false; }

/// \brief Return a shared, immortal no-op logger singleton.
static std::shared_ptr<Logger> Noop();
};

/// \brief Return the process-global default logger (never null).
///
/// Off the hot path -- acquires the slot lock and returns an owning copy. The
/// logging macros use the cheaper internal hot-path accessor instead.
ICEBERG_EXPORT std::shared_ptr<Logger> GetDefaultLogger();

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think only having a process-global default logger may be hard for engine integration.

In engines, users may want different loggers for different catalog, session, query, or task. If all code only reads one global logger, it is hard to route logs to the engine logging context.

Can we consider a way to pass or bind a logger from a higher level context? The global logger can still be the fallback.


/// \brief Install a new process-global default logger.
///
/// A null argument installs the no-op logger. Thread-safe; intended for
/// occasional (configuration-time) use rather than the hot path.
ICEBERG_EXPORT void SetDefaultLogger(std::shared_ptr<Logger> logger);

/// \brief Set the minimum level of the current default logger.
///
/// Convenience for `GetDefaultLogger()->SetLevel(level)`. Filtering is always
/// decided by the logger's own ShouldLog(), so changing a logger's level by any
/// means (this, SetLevel on a held handle, or Initialize) takes effect immediately.
ICEBERG_EXPORT void SetDefaultLevel(LogLevel level);

// ---------------------------------------------------------------------------
// Using the API directly (the LOG_* macros that wrap this are added later in
// the stack). Example: a custom sink, installed as the process default.
//
// class MySink : public Logger {
// public:
// bool ShouldLog(LogLevel level) const override { return level >= level_; }
// void Log(LogMessage&& m) noexcept override { write_line(m.message); }
// void SetLevel(LogLevel level) override { level_ = level; }
// LogLevel level() const override { return level_; }
// private:
// std::atomic<LogLevel> level_{LogLevel::kInfo};
// };
//
// SetDefaultLogger(std::make_shared<MySink>()); // install process-wide
// SetDefaultLevel(LogLevel::kDebug); // adjust the threshold
//
// auto logger = GetDefaultLogger(); // borrow the current default
// if (logger->ShouldLog(LogLevel::kInfo)) {
// logger->Log(LogMessage{.level = LogLevel::kInfo, .message = "scan ready"});
// }
//
// // Or configure from catalog-style properties (applies the "level" key):
// auto sink = std::make_shared<MySink>();
// auto status = sink->Initialize({{std::string(kLevelProperty), "warn"}}); // -> kWarn
// ---------------------------------------------------------------------------

namespace detail {

/// \brief Hot-path accessor for the default logger.
///
/// Returns a reference to a thread-local cached shared_ptr that is refreshed
/// only when the default logger has changed (no lock / no refcount churn in
/// steady state). The reference is valid for the duration of the calling
/// statement.
ICEBERG_EXPORT const std::shared_ptr<Logger>& CurrentLogger() noexcept;

/// \brief Build a LogMessage from the already-formatted text and dispatch it.
///
/// Declared ICEBERG_EXPORT because the logging macros expand into this call in
/// consumer translation units.
ICEBERG_EXPORT void Emit(Logger& logger, LogLevel level,

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should keep a lazy format-style API here.

Now Emit takes an already formatted std::string. This means the string must be built before calling the sink. It is OK after ShouldLog(), but it still makes the API string-based. Some engine loggers may want to keep format string + args, or format only inside their own backend.

Can we provide an API closer to std::format style, like log(level, fmt, args...), and only build the string after the level check? This also makes normal use easier for callers.

const std::source_location& location, std::string&& message);

/// \brief Emit a fixed fallback record when formatting threw.
///
/// noexcept, allocation-light (small/SSO literal), performs no std::format, and
/// does not recurse -- so the macro's "logging never throws" guarantee holds
/// even when a format argument throws.
ICEBERG_EXPORT void EmitFormatError(Logger& logger, LogLevel level,
const std::source_location& location) noexcept;

/// \brief Runtime (non-literal) format-string helper.
///
/// std::format requires a compile-time format string; this routes a runtime
/// string through std::vformat. Args are bound as named lvalues and the
/// arg-store is held in a named variable so it outlives the vformat call
/// (C++23 make_format_args rejects rvalues -- P2905 / LWG3631).
template <typename... Args>
std::string VFormat(std::string_view fmt, Args&&... args) {
auto store = std::make_format_args(args...);
return std::vformat(fmt, store);
}

} // namespace detail

} // namespace iceberg
2 changes: 1 addition & 1 deletion src/iceberg/logging/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
# specific language governing permissions and limitations
# under the License.

install_headers(['log_level.h'], subdir: 'iceberg/logging')
install_headers(['log_level.h', 'logger.h'], subdir: 'iceberg/logging')
1 change: 1 addition & 0 deletions src/iceberg/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ iceberg_sources = files(
'inspect/snapshots_table.cc',
'json_serde.cc',
'location_provider.cc',
'logging/logger.cc',
'manifest/manifest_adapter.cc',
'manifest/manifest_entry.cc',
'manifest/manifest_filter_manager.cc',
Expand Down
2 changes: 1 addition & 1 deletion src/iceberg/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ add_iceberg_test(table_test
table_test.cc
table_update_test.cc)

add_iceberg_test(logging_test SOURCES log_level_test.cc)
add_iceberg_test(logging_test SOURCES log_level_test.cc logger_test.cc)

add_iceberg_test(expression_test
SOURCES
Expand Down
Loading
Loading