opendocument-app · andiwand · Jun 25, 2026 · chatgpt-codex-connector · Jun 25, 2026
diff --git a/src/odr/internal/html/pdf_file.cpp b/src/odr/internal/html/pdf_file.cpp
@@ -176,6 +176,33 @@ std::string svg_path_fragment(const pdf::PathElement &path,
   return std::move(f).str();
 }
 
+/// Serialize an image XObject to an SVG `<image>` fragment in the page viewBox,
+/// or "" when it carries no pass-through bytes. The image fills the unit square
+/// in user space (ISO 32000-1 8.10.5); the transform maps that square — through
+/// a vertical flip (the image's first row is its top, SVG draws y-down) and the
+/// CTM — into the page box. `clip_id` installs a clip via `clip-path`.
+std::string svg_image_fragment(const pdf::ImageElement &image,
+                               const util::math::Transform2D &to_box,
+                               const std::string &clip_id) {
+  if (image.data.empty()) {
+    return {};
+  }
+  // image natural box [0,1] (y-down) -> PDF unit square (y-up) -> user -> box.
+  constexpr util::math::Transform2D flip =
+      util::math::Transform2D::scaling_translation(1, -1, 0, 1);
+  const util::math::Transform2D m = flip * image.transform * to_box;
+
+  std::ostringstream f;
+  f << R"(<image width="1" height="1" preserveAspectRatio="none" transform="matrix()"
+    << m.a << ',' << m.b << ',' << m.c << ',' << m.d << ',' << round2(m.e)
+    << ',' << round2(m.f) << ")\"";
+  if (!clip_id.empty()) {
+    f << " clip-path=\"url(#" << clip_id << ")\"";
+  }
+  f << " href=\"" << file_to_url(image.data, image.mime) << "\"/>";
+  return std::move(f).str();
+}
+
 /// Registers a page's clip regions as nested `<clipPath>` defs, deduplicating
 /// shared prefixes. PDF's current clip is the *intersection* of an ordered list
 /// of regions; SVG expresses intersection by chaining `clip-path` from one
@@ -330,9 +357,9 @@ class HtmlServiceImpl final : public HtmlService {
     std::string glyph_classes;
     std::string glyph_text;
   };
-  // One painted path, already serialized to an SVG `<path .../>` fragment in
-  // the page's viewBox (PDF points, y-down). Contiguous paths share one `<svg>`
-  // at write time.
+  // One vector item, already serialized to an SVG fragment in the page's
+  // viewBox (PDF points, y-down): a painted `<path>` or an `<image>`.
+  // Contiguous vector items share one `<svg>` at write time.
   struct PathOut {
     std::string svg;
   };
@@ -501,6 +528,17 @@ class HtmlServiceImpl final : public HtmlService {
           continue;
         }
 
+        // An image XObject: an `<image>` placed by the CTM, in the page `<svg>`
+        // alongside the paths (so it layers by paint order).
+        if (const auto *image = std::get_if<pdf::ImageElement>(&element)) {
+          const std::string clip_id = clips.register_clip(image->clip, to_box);
+          std::string fragment = svg_image_fragment(*image, to_box, clip_id);
+          if (!fragment.empty()) {
+            page_out.items.push_back(PathOut{std::move(fragment)});
+          }
+          continue;
+        }
+
         const pdf::TextElement &text = std::get<pdf::TextElement>(element);
         // The font index is non-zero when an embedded font lets us render
         // the actual glyphs; 0 falls through to the legacy path.

diff --git a/src/odr/internal/pdf/pdf_document_element.hpp b/src/odr/internal/pdf/pdf_document_element.hpp
@@ -123,6 +123,13 @@ struct XObject final : Element {
   /// Form XObject only: the decoded (filter-applied) content stream, read
   /// eagerly at parse time so text extraction needs no parser handle.
   std::string content;
+
+  /// Image XObject only: the encoded image bytes passed through to the browser
+  /// (stage 4.5: JPEG / `DCTDecode`), with `image_mime` naming the codec. Empty
+  /// for an image whose codec is not yet a pass-through (Flate/LZW raster,
+  /// image masks — later stages) and for non-image XObjects, so `Do` skips it.
+  std::string image_data;
+  std::string image_mime;
 };
 
 /// A non-owning view over a string of PDF character codes, splitting it into

diff --git a/src/odr/internal/pdf/pdf_document_parser.cpp b/src/odr/internal/pdf/pdf_document_parser.cpp
@@ -540,10 +540,29 @@ XObject *parse_x_object(State &state, const ObjectReference &reference) {
                                   ? dictionary["Subtype"].as_name()
                                   : "";
   if (subtype == "Image") {
-    // Image XObjects carry raster data, not a content stream: recognized but
-    // not decoded until stage 4 (and `read_decoded_stream` would throw on the
-    // image codec anyway).
     x_object->subtype = XObject::Subtype::image;
+    // Stage 4.5: pass a JPEG (`DCTDecode`) image through to the browser
+    // undecoded. `/ImageMask` stencils, color-key masks and the non-JPEG raster
+    // codecs are later stages; leave their bytes empty so `Do` skips them.
+    const bool image_mask =
+        dictionary.get("ImageMask").as_bool_opt().value_or(false);
+    if (!image_mask) {
+      std::string raw = parser.read_object_stream(object);
+      Object filter;
+      Object decode_parms;
+      if (dictionary.has_key("Filter")) {
+        filter = parser.deep_resolve_object_copy(dictionary["Filter"]);
+      }
+      if (dictionary.has_key("DecodeParms")) {
+        decode_parms =
+            parser.deep_resolve_object_copy(dictionary["DecodeParms"]);
+      }
+      DecodeResult result = decode(filter, decode_parms, std::move(raw));
+      if (result.stopped_at_filter == "DCTDecode") {
+        x_object->image_data = std::move(result.data);
+        x_object->image_mime = "image/jpeg";
+      }
+    }
     return x_object;
   }
   if (subtype != "Form") {

diff --git a/src/odr/internal/pdf/pdf_page_element.hpp b/src/odr/internal/pdf/pdf_page_element.hpp
@@ -86,9 +86,22 @@ struct PathElement {
   double dash_phase{0};
 };
 
-/// A single page-content element in paint (z) order: a shown text segment or a
-/// painted path. Images, shadings and patterns join this variant in later
+/// One image XObject painted by `Do`, placed by the CTM in effect when it was
+/// invoked (ISO 32000-1 8.10.5): the image fills the unit square in user space,
+/// which `transform` maps. The encoded bytes pass straight through to the
+/// browser (stage 4.5: JPEG / `DCTDecode`), `mime` naming the codec. The clip
+/// is snapshotted as for a path.
+struct ImageElement {
+  /// CTM at `Do` time: maps the image's unit square to user space.
+  util::math::Transform2D transform;
+  std::vector<ClipPath> clip;
+  std::string data; // encoded image bytes (e.g. a JPEG)
+  std::string mime; // e.g. "image/jpeg"
+};
+
+/// A single page-content element in paint (z) order: a shown text segment, a
+/// painted path or an image. Shadings and patterns join this variant in later
 /// stage-4 PRs.
-using PageElement = std::variant<TextElement, PathElement>;
+using PageElement = std::variant<TextElement, PathElement, ImageElement>;
 
 } // namespace odr::internal::pdf
diff --git a/src/odr/internal/pdf/pdf_page_extractor.cpp b/src/odr/internal/pdf/pdf_page_extractor.cpp
@@ -450,8 +450,9 @@ void begin_marked_content(const GraphicsOperator &op,
 /// Invoke a form XObject named by `Do`: save the state, concatenate the form's
 /// `/Matrix` onto the CTM, run its content with the form's own `/Resources`
 /// (falling back to the enclosing scope), then restore (ISO 32000-1 8.10.1).
-/// `/BBox` clipping is deferred (text-only). Image and unknown XObjects are
-/// skipped, and a form already on the render stack is skipped (cycle guard).
+/// `/BBox` clips the form's content. An image XObject emits an `ImageElement`
+/// (when its codec passes through); unknown subtypes are skipped, and a form
+/// already on the render stack is skipped (cycle guard).
 void invoke_x_object(const std::string &name, const Resources &resources,
                      GraphicsState &state, std::vector<PageElement> &out,
                      const Logger &logger, std::set<std::string> &warned,
@@ -466,8 +467,22 @@ void invoke_x_object(const std::string &name, const Resources &resources,
   }
 
   const XObject *x_object = it->second;
+  if (x_object->subtype == XObject::Subtype::image) {
+    // An image is placed by the CTM in effect (its unit square maps to user
+    // space), under the current clip. Only codecs with bytes ready for the
+    // browser carry `image_data` (stage 4.5: JPEG); the rest are skipped.
+    if (!x_object->image_data.empty()) {
+      ImageElement image;
+      image.transform = state.current().general.transform_matrix;
+      image.clip = state.current().clip;
+      image.data = x_object->image_data;
+      image.mime = x_object->image_mime;
+      out.push_back(std::move(image));
+    }
+    return;
+  }
   if (x_object->subtype != XObject::Subtype::form) {
-    return; // image XObjects are stage 4; unknown subtypes are inexecutable
+    return; // unknown subtypes are inexecutable
   }
   if (!active.insert(x_object).second) {
     ODR_WARNING(logger, "pdf: cyclic form XObject invocation, skipping");

diff --git a/test/src/internal/pdf/pdf_page_extractor.cpp b/test/src/internal/pdf/pdf_page_extractor.cpp
@@ -781,6 +781,76 @@ TEST(PdfPageExtractor, device_color_clears_color_space) {
   EXPECT_DOUBLE_EQ(p.fill_color.rgb[1], 0.0);
 }
 
+// --- stage 4.5: image XObjects (JPEG pass-through) ------------------------
+
+namespace {
+
+XObject jpeg_x_object(std::string data) {
+  XObject x_object;
+  x_object.subtype = XObject::Subtype::image;
+  x_object.image_data = std::move(data);
+  x_object.image_mime = "image/jpeg";
+  return x_object;
+}
+
+} // namespace
+
+// `Do` on a pass-through image XObject emits an `ImageElement` placed by the
+// CTM, carrying the encoded bytes verbatim.
+TEST(PdfPageExtractor, image_xobject_emitted_at_ctm) {
+  XObject image = jpeg_x_object("JFIF-bytes");
+  Resources res;
+  res.x_object["Im0"] = &image;
+
+  const auto page =
+      extract_page("q 2 0 0 3 10 20 cm /Im0 Do Q", res, Logger::null());
+  ASSERT_EQ(page.size(), 1);
+  const ImageElement &img = std::get<ImageElement>(page[0]);
+  EXPECT_EQ(img.data, "JFIF-bytes");
+  EXPECT_EQ(img.mime, "image/jpeg");
+  EXPECT_DOUBLE_EQ(img.transform.a, 2); // unit square -> 2 wide
+  EXPECT_DOUBLE_EQ(img.transform.d, 3); // 3 tall
+  EXPECT_DOUBLE_EQ(img.transform.e, 10);
+  EXPECT_DOUBLE_EQ(img.transform.f, 20);
+}
+
+// An image whose codec is not a pass-through (no `image_data`) is skipped, as
+// is an unknown XObject — `Do` emits nothing.
+TEST(PdfPageExtractor, image_xobject_without_data_skipped) {
+  XObject image; // subtype image, but no decoded pass-through bytes
+  image.subtype = XObject::Subtype::image;
+  Resources res;
+  res.x_object["Im0"] = &image;
+
+  EXPECT_TRUE(extract_page("/Im0 Do", res, Logger::null()).empty());
+}
+
+// An image is clipped by the current clip, like a path.
+TEST(PdfPageExtractor, image_xobject_carries_clip) {
+  XObject image = jpeg_x_object("bytes");
+  Resources res;
+  res.x_object["Im0"] = &image;
+
+  const auto page =
+      extract_page("0 0 50 50 re W n /Im0 Do", res, Logger::null());
+  ASSERT_EQ(page.size(), 1);
+  EXPECT_EQ(std::get<ImageElement>(page[0]).clip.size(), 1);
+}
+
+// Images interleave with paths and text in paint order.
+TEST(PdfPageExtractor, image_in_paint_order) {
+  XObject image = jpeg_x_object("bytes");
+  Resources res;
+  res.x_object["Im0"] = &image;
+
+  const auto page =
+      extract_page("0 0 10 10 re f /Im0 Do 5 5 m 6 6 l S", res, Logger::null());
+  ASSERT_EQ(page.size(), 3);
+  EXPECT_TRUE(std::holds_alternative<PathElement>(page[0]));
+  EXPECT_TRUE(std::holds_alternative<ImageElement>(page[1]));
+  EXPECT_TRUE(std::holds_alternative<PathElement>(page[2]));
+}
+
 // --- stage 4.3: clipping --------------------------------------------------
 
 // A `W n` clip rect limits a later fill: the fill carries the clip region (the