Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion src/iceberg/manifest/manifest_reader.cc
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
Expand Down Expand Up @@ -681,7 +681,19 @@
const std::vector<std::string>& columns,
bool case_sensitive) {
if (!columns.empty()) {
return schema->Select(columns, case_sensitive);
if (std::ranges::contains(columns, Schema::kAllColumns)) {
return schema->Select(columns, case_sensitive);
}

std::vector<std::string> existing_columns;
existing_columns.reserve(columns.size());
for (const auto& column : columns) {
ICEBERG_ASSIGN_OR_RAISE(auto field, schema->FindFieldByName(column, case_sensitive));
if (field.has_value()) {
existing_columns.push_back(column);
}
}
return schema->Select(existing_columns, case_sensitive);
}
return schema;
}
Expand Down
5 changes: 3 additions & 2 deletions src/iceberg/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -366,9 +366,10 @@ Result<std::unique_ptr<Schema>> Schema::Select(std::span<const std::string> name
std::unordered_set<int32_t> selected_ids;
for (const auto& name : names) {
ICEBERG_ASSIGN_OR_RAISE(auto result, FindFieldByName(name, case_sensitive));
if (result.has_value()) {
selected_ids.insert(result.value().get().field_id());
if (!result.has_value()) {
return InvalidArgument("Cannot find selected column: {}", name);
}
selected_ids.insert(result.value().get().field_id());
}

PruneColumnVisitor visitor(selected_ids, /*select_full_types=*/true);
Expand Down
3 changes: 2 additions & 1 deletion src/iceberg/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ class ICEBERG_EXPORT Schema : public StructType {
///
/// \param names Selected field names and nested names are dot-concatenated.
/// \param case_sensitive Whether name matching is case-sensitive (default: true).
/// \return Projected schema containing only selected fields.
/// \return Projected schema containing only selected fields, or an error if any
/// requested field cannot be found.
/// \note If the field name of a nested type has been selected, all of its
/// sub-fields will be selected.
Result<std::unique_ptr<Schema>> Select(std::span<const std::string> names,
Expand Down
5 changes: 5 additions & 0 deletions src/iceberg/table_scan.cc
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
Expand Down Expand Up @@ -413,6 +413,11 @@
ICEBERG_RETURN_UNEXPECTED(context_.Validate());

ICEBERG_ASSIGN_OR_RAISE(auto schema, ResolveSnapshotSchema());
if (!context_.selected_columns.empty()) {
ICEBERG_ASSIGN_OR_RAISE(
std::ignore,
schema.get()->Select(context_.selected_columns, context_.case_sensitive));
}
return ScanType::Make(metadata_, schema.get(), io_, std::move(context_));
}

Expand Down
14 changes: 12 additions & 2 deletions src/iceberg/test/schema_test.cc
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
Expand Down Expand Up @@ -816,13 +816,15 @@
.create_schema = []() { return BasicSchema(); },
.select_fields = {"nonexistent"},
.expected_schema = []() { return MakeSchema(); },
.should_succeed = true},
.should_succeed = false,
.expected_error_message = "Cannot find selected column: nonexistent"},

SelectTestParam{.test_name = "SelectCaseSensitive",
.create_schema = []() { return BasicSchema(); },
.select_fields = {"Name"}, // case-sensitive
.expected_schema = []() { return MakeSchema(); },
.should_succeed = true},
.should_succeed = false,
.expected_error_message = "Cannot find selected column: Name"},

SelectTestParam{.test_name = "SelectCaseInsensitive",
.create_schema = []() { return BasicSchema(); },
Expand All @@ -840,6 +842,14 @@
.expected_schema = []() { return MakeSchema(Id(), Name()); },
.should_succeed = true},

SelectTestParam{
.test_name = "SelectPartialMissingField",
.create_schema = []() { return AddressSchema(); },
.select_fields = {"id", "typo"},
.expected_schema = []() { return MakeSchema(); },
.should_succeed = false,
.expected_error_message = "Cannot find selected column: typo"},

SelectTestParam{.test_name = "SelectNestedField",
.create_schema = []() { return AddressSchema(); },
.select_fields = {"address.street"},
Expand Down
7 changes: 7 additions & 0 deletions src/iceberg/test/table_scan_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,13 @@ TEST_P(TableScanTest, SchemaWithSelectedColumnsAndFilter) {
}
}

TEST_P(TableScanTest, SelectMissingColumnFails) {
ICEBERG_UNWRAP_OR_FAIL(auto builder,
DataTableScanBuilder::Make(table_metadata_, file_io_));
builder->Select({"id", "typo"});
EXPECT_THAT(builder->Build(), IsError(ErrorKind::kInvalidArgument));
}

INSTANTIATE_TEST_SUITE_P(TableScanVersions, TableScanTest, testing::Values(1, 2, 3));

} // namespace iceberg
Loading