diff --git a/src/odr/internal/html/pdf_file.cpp b/src/odr/internal/html/pdf_file.cpp
index 953e4b23..7f1e06ff 100644
--- a/src/odr/internal/html/pdf_file.cpp
+++ b/src/odr/internal/html/pdf_file.cpp
@@ -177,6 +177,33 @@ std::string svg_path_fragment(const pdf::PathElement &path,
   return std::move(f).str();
 }
 
+/// Serialize an image XObject to an SVG `<image>` fragment in the page viewBox,
+/// or "" when it carries no pass-through bytes. The image fills the unit square
+/// in user space (ISO 32000-1 8.10.5); the transform maps that square — through
+/// a vertical flip (the image's first row is its top, SVG draws y-down) and the
+/// CTM — into the page box. `clip_id` installs a clip via `clip-path`.
+std::string svg_image_fragment(const pdf::ImageElement &image,
+                               const util::math::Transform2D &to_box,
+                               const std::string &clip_id) {
+  if (image.data.empty()) {
+    return {};
+  }
+  // image natural box [0,1] (y-down) -> PDF unit square (y-up) -> user -> box.
+  constexpr util::math::Transform2D flip =
+      util::math::Transform2D::scaling_translation(1, -1, 0, 1);
+  const util::math::Transform2D m = flip * image.transform * to_box;
+
+  std::ostringstream f;
+  f << R"(<image width="1" height="1" preserveAspectRatio="none" transform="matrix()"
+    << m.a << ',' << m.b << ',' << m.c << ',' << m.d << ',' << round2(m.e)
+    << ',' << round2(m.f) << ")\"";
+  if (!clip_id.empty()) {
+    f << " clip-path=\"url(#" << clip_id << ")\"";
+  }
+  f << " href=\"" << file_to_url(image.data, image.mime) << "\"/>";
+  return std::move(f).str();
+}
+
 /// Registers a page's clip regions as nested `<clipPath>` defs, deduplicating
 /// shared prefixes. PDF's current clip is the *intersection* of an ordered list
 /// of regions; SVG expresses intersection by chaining `clip-path` from one
@@ -331,9 +358,9 @@ class HtmlServiceImpl final : public HtmlService {
     std::string glyph_classes;
     std::string glyph_text;
   };
-  // One painted path, already serialized to an SVG `<path .../>` fragment in
-  // the page's viewBox (PDF points, y-down). Contiguous paths share one `<svg>`
-  // at write time.
+  // One vector item, already serialized to an SVG fragment in the page's
+  // viewBox (PDF points, y-down): a painted `<path>` or an `<image>`.
+  // Contiguous vector items share one `<svg>` at write time.
   struct PathOut {
     std::string svg;
   };
@@ -562,6 +589,17 @@ class HtmlServiceImpl final : public HtmlService {
           continue;
         }
 
+        // An image XObject: an `<image>` placed by the CTM, in the page `<svg>`
+        // alongside the paths (so it layers by paint order).
+        if (const auto *image = std::get_if<pdf::ImageElement>(&element)) {
+          const std::string clip_id = clips.register_clip(image->clip, to_box);
+          std::string fragment = svg_image_fragment(*image, to_box, clip_id);
+          if (!fragment.empty()) {
+            page_out.items.push_back(PathOut{std::move(fragment)});
+          }
+          continue;
+        }
+
         const pdf::TextElement &text = std::get<pdf::TextElement>(element);
         // TODO(clip text): the active clip is not applied to text. Paths carry
         // a clip snapshot realized as an SVG `<clipPath>`, but text is emitted
diff --git a/src/odr/internal/pdf/pdf_document_element.hpp b/src/odr/internal/pdf/pdf_document_element.hpp
index 32119485..3f094a3e 100644
--- a/src/odr/internal/pdf/pdf_document_element.hpp
+++ b/src/odr/internal/pdf/pdf_document_element.hpp
@@ -123,6 +123,13 @@ struct XObject final : Element {
   /// Form XObject only: the decoded (filter-applied) content stream, read
   /// eagerly at parse time so text extraction needs no parser handle.
   std::string content;
+
+  /// Image XObject only: the encoded image bytes passed through to the browser
+  /// (stage 4.5: JPEG / `DCTDecode`), with `image_mime` naming the codec. Empty
+  /// for an image whose codec is not yet a pass-through (Flate/LZW raster,
+  /// image masks — later stages) and for non-image XObjects, so `Do` skips it.
+  std::string image_data;
+  std::string image_mime;
 };
 
 /// A non-owning view over a string of PDF character codes, splitting it into
diff --git a/src/odr/internal/pdf/pdf_document_parser.cpp b/src/odr/internal/pdf/pdf_document_parser.cpp
index 3e825787..35fd74eb 100644
--- a/src/odr/internal/pdf/pdf_document_parser.cpp
+++ b/src/odr/internal/pdf/pdf_document_parser.cpp
@@ -549,10 +549,34 @@ XObject *parse_x_object(State &state, const ObjectReference &reference) {
                                   ? dictionary["Subtype"].as_name()
                                   : "";
   if (subtype == "Image") {
-    // Image XObjects carry raster data, not a content stream: recognized but
-    // not decoded until stage 4 (and `read_decoded_stream` would throw on the
-    // image codec anyway).
     x_object->subtype = XObject::Subtype::image;
+    // Stage 4.5: pass a JPEG (`DCTDecode`) image through to the browser
+    // undecoded. `/ImageMask` stencils, color-key masks and the non-JPEG raster
+    // codecs are later stages; leave their bytes empty so `Do` skips them.
+    const bool image_mask =
+        dictionary.get("ImageMask").as_bool_opt().value_or(false);
+    Object filter;
+    if (!image_mask && dictionary.has_key("Filter")) {
+      filter = parser.deep_resolve_object_copy(dictionary["Filter"]);
+    }
+    // Only a JPEG passes straight through to the browser. Gate on the chain's
+    // terminal codec so a non-pass-through raster (e.g. FlateDecode with a
+    // predictor) is left empty without inflating it — that decode is wasted for
+    // a skipped image and can throw on parameters we don't support, which would
+    // otherwise abort the whole document. `Do` skips an image with no bytes.
+    if (!image_mask && terminal_image_codec(filter) == "DCTDecode") {
+      Object decode_parms;
+      if (dictionary.has_key("DecodeParms")) {
+        decode_parms =
+            parser.deep_resolve_object_copy(dictionary["DecodeParms"]);
+      }
+      std::string raw = parser.read_object_stream(object);
+      DecodeResult result = decode(filter, decode_parms, std::move(raw));
+      if (result.stopped_at_filter == "DCTDecode") {
+        x_object->image_data = std::move(result.data);
+        x_object->image_mime = "image/jpeg";
+      }
+    }
     return x_object;
   }
   if (subtype != "Form") {
diff --git a/src/odr/internal/pdf/pdf_filter.cpp b/src/odr/internal/pdf/pdf_filter.cpp
index b706643a..08d11086 100644
--- a/src/odr/internal/pdf/pdf_filter.cpp
+++ b/src/odr/internal/pdf/pdf_filter.cpp
@@ -241,6 +241,27 @@ pdf::DecodeResult pdf::decode(const Object &filter, const Object &decode_parms,
   return result;
 }
 
+std::optional<std::string> pdf::terminal_image_codec(const Object &filter) {
+  Object last;
+  if (filter.is_array()) {
+    const Array &array = filter.as_array();
+    if (array.empty()) {
+      return std::nullopt;
+    }
+    last = array.back();
+  } else if (!filter.is_null()) {
+    last = filter;
+  } else {
+    return std::nullopt;
+  }
+
+  std::string name = canonical_filter_name(last.as_string());
+  if (is_image_codec(name)) {
+    return name;
+  }
+  return std::nullopt;
+}
+
 std::string pdf::ascii_hex_decode(const std::string &input) {
   std::string result;
   result.reserve(input.size() / 2);
diff --git a/src/odr/internal/pdf/pdf_filter.hpp b/src/odr/internal/pdf/pdf_filter.hpp
index 32c76c40..2ca1fc67 100644
--- a/src/odr/internal/pdf/pdf_filter.hpp
+++ b/src/odr/internal/pdf/pdf_filter.hpp
@@ -24,6 +24,12 @@ struct DecodeResult {
 DecodeResult decode(const Object &filter, const Object &decode_parms,
                     std::string data);
 
+/// The image codec a `/Filter` chain terminates in — its last entry, when that
+/// is an image codec such as DCTDecode (the filter `decode` would stop at) —
+/// else nullopt for a chain that decodes fully. Lets a caller recognise a
+/// pass-through image without inflating a raster it only means to skip.
+std::optional<std::string> terminal_image_codec(const Object &filter);
+
 std::string ascii_hex_decode(const std::string &input);
 std::string ascii85_decode(const std::string &input);
 std::string lzw_decode(const std::string &input, Integer early_change = 1);
diff --git a/src/odr/internal/pdf/pdf_object.hpp b/src/odr/internal/pdf/pdf_object.hpp
index 13b109c9..b15d48cb 100644
--- a/src/odr/internal/pdf/pdf_object.hpp
+++ b/src/odr/internal/pdf/pdf_object.hpp
@@ -254,6 +254,7 @@ class Array final {
   [[nodiscard]] const Holder &holder() const { return m_holder; }
 
   [[nodiscard]] std::size_t size() const { return m_holder.size(); }
+  [[nodiscard]] bool empty() const { return m_holder.empty(); }
   [[nodiscard]] Holder::iterator begin() { return m_holder.begin(); }
   [[nodiscard]] Holder::iterator end() { return m_holder.end(); }
   [[nodiscard]] Holder::const_iterator begin() const {
@@ -264,6 +265,11 @@ class Array final {
   Object &operator[](const std::size_t i) { return m_holder.at(i); }
   const Object &operator[](const std::size_t i) const { return m_holder.at(i); }
 
+  Object &front() { return m_holder.front(); }
+  [[nodiscard]] const Object &front() const { return m_holder.front(); }
+  Object &back() { return m_holder.back(); }
+  [[nodiscard]] const Object &back() const { return m_holder.back(); }
+
   void to_stream(std::ostream &) const;
   [[nodiscard]] std::string to_string() const;
 
diff --git a/src/odr/internal/pdf/pdf_page_element.hpp b/src/odr/internal/pdf/pdf_page_element.hpp
index b071c22d..38e419e1 100644
--- a/src/odr/internal/pdf/pdf_page_element.hpp
+++ b/src/odr/internal/pdf/pdf_page_element.hpp
@@ -86,9 +86,22 @@ struct PathElement {
   double dash_phase{0};
 };
 
-/// A single page-content element in paint (z) order: a shown text segment or a
-/// painted path. Images, shadings and patterns join this variant in later
+/// One image XObject painted by `Do`, placed by the CTM in effect when it was
+/// invoked (ISO 32000-1 8.10.5): the image fills the unit square in user space,
+/// which `transform` maps. The encoded bytes pass straight through to the
+/// browser (stage 4.5: JPEG / `DCTDecode`), `mime` naming the codec. The clip
+/// is snapshotted as for a path.
+struct ImageElement {
+  /// CTM at `Do` time: maps the image's unit square to user space.
+  util::math::Transform2D transform;
+  std::vector<ClipPath> clip;
+  std::string data; // encoded image bytes (e.g. a JPEG)
+  std::string mime; // e.g. "image/jpeg"
+};
+
+/// A single page-content element in paint (z) order: a shown text segment, a
+/// painted path or an image. Shadings and patterns join this variant in later
 /// stage-4 PRs.
-using PageElement = std::variant<TextElement, PathElement>;
+using PageElement = std::variant<TextElement, PathElement, ImageElement>;
 
 } // namespace odr::internal::pdf
diff --git a/src/odr/internal/pdf/pdf_page_extractor.cpp b/src/odr/internal/pdf/pdf_page_extractor.cpp
index ce069371..0ed7b12d 100644
--- a/src/odr/internal/pdf/pdf_page_extractor.cpp
+++ b/src/odr/internal/pdf/pdf_page_extractor.cpp
@@ -450,8 +450,9 @@ void begin_marked_content(const GraphicsOperator &op,
 /// Invoke a form XObject named by `Do`: save the state, concatenate the form's
 /// `/Matrix` onto the CTM, run its content with the form's own `/Resources`
 /// (falling back to the enclosing scope), then restore (ISO 32000-1 8.10.1).
-/// `/BBox` clipping is deferred (text-only). Image and unknown XObjects are
-/// skipped, and a form already on the render stack is skipped (cycle guard).
+/// `/BBox` clips the form's content. An image XObject emits an `ImageElement`
+/// (when its codec passes through); unknown subtypes are skipped, and a form
+/// already on the render stack is skipped (cycle guard).
 void invoke_x_object(const std::string &name, const Resources &resources,
                      GraphicsState &state, std::vector<PageElement> &out,
                      const Logger &logger, std::set<std::string> &warned,
@@ -466,8 +467,22 @@ void invoke_x_object(const std::string &name, const Resources &resources,
   }
 
   const XObject *x_object = it->second;
+  if (x_object->subtype == XObject::Subtype::image) {
+    // An image is placed by the CTM in effect (its unit square maps to user
+    // space), under the current clip. Only codecs with bytes ready for the
+    // browser carry `image_data` (stage 4.5: JPEG); the rest are skipped.
+    if (!x_object->image_data.empty()) {
+      ImageElement image;
+      image.transform = state.current().general.transform_matrix;
+      image.clip = state.current().clip;
+      image.data = x_object->image_data;
+      image.mime = x_object->image_mime;
+      out.push_back(std::move(image));
+    }
+    return;
+  }
   if (x_object->subtype != XObject::Subtype::form) {
-    return; // image XObjects are stage 4; unknown subtypes are inexecutable
+    return; // unknown subtypes are inexecutable
   }
   if (!active.insert(x_object).second) {
     ODR_WARNING(logger, "pdf: cyclic form XObject invocation, skipping");
diff --git a/test/src/internal/pdf/pdf_filter.cpp b/test/src/internal/pdf/pdf_filter.cpp
index eaa89a76..8600bca8 100644
--- a/test/src/internal/pdf/pdf_filter.cpp
+++ b/test/src/internal/pdf/pdf_filter.cpp
@@ -151,6 +151,26 @@ TEST(PdfFilter, decode_stops_at_image_codec) {
       0);
 }
 
+TEST(PdfFilter, terminal_image_codec_identifies_passthrough) {
+  // The codec a chain terminates in, recognised without decoding — its last
+  // entry when that is an image codec (abbreviations canonicalised).
+  EXPECT_EQ(terminal_image_codec(name("DCTDecode")), "DCTDecode");
+  EXPECT_EQ(terminal_image_codec(name("DCT")), "DCTDecode");
+  EXPECT_EQ(terminal_image_codec(array({name("ASCII85Decode"), name("DCT")})),
+            "DCTDecode");
+  EXPECT_EQ(terminal_image_codec(name("JPXDecode")), "JPXDecode");
+}
+
+TEST(PdfFilter, terminal_image_codec_none_for_decodable_chain) {
+  // A chain with no image codec (or none last) decodes fully: no pass-through.
+  EXPECT_FALSE(terminal_image_codec(Object()).has_value());
+  EXPECT_FALSE(terminal_image_codec(name("FlateDecode")).has_value());
+  EXPECT_FALSE(terminal_image_codec(array({})).has_value());
+  EXPECT_FALSE(
+      terminal_image_codec(array({name("DCTDecode"), name("FlateDecode")}))
+          .has_value());
+}
+
 TEST(PdfFilter, decode_crypt_identity_passes_through) {
   const DecodeResult result =
       decode(name("Crypt"), dictionary({{"Name", name("Identity")}}), "data");
diff --git a/test/src/internal/pdf/pdf_page_extractor.cpp b/test/src/internal/pdf/pdf_page_extractor.cpp
index d6711abc..93a26003 100644
--- a/test/src/internal/pdf/pdf_page_extractor.cpp
+++ b/test/src/internal/pdf/pdf_page_extractor.cpp
@@ -852,3 +852,73 @@ TEST(PdfPageExtractor, device_color_clears_color_space) {
   EXPECT_DOUBLE_EQ(p.fill_color.rgb[0], 1.0);
   EXPECT_DOUBLE_EQ(p.fill_color.rgb[1], 0.0);
 }
+
+// --- stage 4.5: image XObjects (JPEG pass-through) ------------------------
+
+namespace {
+
+XObject jpeg_x_object(std::string data) {
+  XObject x_object;
+  x_object.subtype = XObject::Subtype::image;
+  x_object.image_data = std::move(data);
+  x_object.image_mime = "image/jpeg";
+  return x_object;
+}
+
+} // namespace
+
+// `Do` on a pass-through image XObject emits an `ImageElement` placed by the
+// CTM, carrying the encoded bytes verbatim.
+TEST(PdfPageExtractor, image_xobject_emitted_at_ctm) {
+  XObject image = jpeg_x_object("JFIF-bytes");
+  Resources res;
+  res.x_object["Im0"] = &image;
+
+  const auto page =
+      extract_page("q 2 0 0 3 10 20 cm /Im0 Do Q", res, Logger::null());
+  ASSERT_EQ(page.size(), 1);
+  const ImageElement &img = std::get<ImageElement>(page[0]);
+  EXPECT_EQ(img.data, "JFIF-bytes");
+  EXPECT_EQ(img.mime, "image/jpeg");
+  EXPECT_DOUBLE_EQ(img.transform.a, 2); // unit square -> 2 wide
+  EXPECT_DOUBLE_EQ(img.transform.d, 3); // 3 tall
+  EXPECT_DOUBLE_EQ(img.transform.e, 10);
+  EXPECT_DOUBLE_EQ(img.transform.f, 20);
+}
+
+// An image whose codec is not a pass-through (no `image_data`) is skipped, as
+// is an unknown XObject — `Do` emits nothing.
+TEST(PdfPageExtractor, image_xobject_without_data_skipped) {
+  XObject image; // subtype image, but no decoded pass-through bytes
+  image.subtype = XObject::Subtype::image;
+  Resources res;
+  res.x_object["Im0"] = &image;
+
+  EXPECT_TRUE(extract_page("/Im0 Do", res, Logger::null()).empty());
+}
+
+// An image is clipped by the current clip, like a path.
+TEST(PdfPageExtractor, image_xobject_carries_clip) {
+  XObject image = jpeg_x_object("bytes");
+  Resources res;
+  res.x_object["Im0"] = &image;
+
+  const auto page =
+      extract_page("0 0 50 50 re W n /Im0 Do", res, Logger::null());
+  ASSERT_EQ(page.size(), 1);
+  EXPECT_EQ(std::get<ImageElement>(page[0]).clip.size(), 1);
+}
+
+// Images interleave with paths and text in paint order.
+TEST(PdfPageExtractor, image_in_paint_order) {
+  XObject image = jpeg_x_object("bytes");
+  Resources res;
+  res.x_object["Im0"] = &image;
+
+  const auto page =
+      extract_page("0 0 10 10 re f /Im0 Do 5 5 m 6 6 l S", res, Logger::null());
+  ASSERT_EQ(page.size(), 3);
+  EXPECT_TRUE(std::holds_alternative<PathElement>(page[0]));
+  EXPECT_TRUE(std::holds_alternative<ImageElement>(page[1]));
+  EXPECT_TRUE(std::holds_alternative<PathElement>(page[2]));
+}