diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index e6ead45b..94c84505 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -337,12 +337,24 @@ report_parse_error(UC const *p, parse_error error) {
 // spans (read only by the rare digit_comp slow path) are not materialized,
 // which keeps the fat parsed_number_string_t off the hot path. The caller
 // re-parses with store_spans=true if the slow path is actually reached.
-template <bool basic_json_fmt, typename UC>
+//
+// has_separator is a *compile-time* flag (the opposite choice from store_spans,
+// and deliberately so): the separator-aware code paths are an opt-in feature
+// that the vast majority of callers never enable. Gating them on a template
+// parameter means the has_separator==false instantiation -- the default that
+// everybody uses -- compiles to exactly the same code as if the feature did not
+// exist: no separator comparison ever enters a digit loop, and the SIMD
+// eight-digit fast path stays intact. The has_separator==true instantiation is
+// cold code that default callers never execute. See parse_number_string_options
+// for the runtime->compile-time dispatch.
+template <bool basic_json_fmt, bool has_separator, typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
 parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
                     bool store_spans = true) noexcept {
   chars_format const fmt = detail::adjust_for_feature_macros(options.format);
   UC const decimal_point = options.decimal_point;
+  UC const separator = options.digit_separator;
+  (void)separator; // unused when has_separator == false
 
   parsed_number_string_t<UC> answer;
   answer.valid = false;
@@ -375,16 +387,19 @@ parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
   UC const *const start_digits = p;
 
   uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
-
-  // Straight-line unroll of the integer-part scan: most integer parts are
-  // 1-5 digits, so peeling the first iterations eliminates the loop back-edge
-  // for the common case. Semantics are identical to the original `while` loop:
-  // i = 10*i + digit, advancing p.
-  if ((p != pend) && is_integer(*p)) {
-    i = uint64_t(*p - UC('0'));
-    ++p;
+  int64_t digit_count = 0;
+  // Points at the first actual digit (== start_digits when no separator
+  // precedes it). Used only by the basic_json leading-zero check.
+  UC const *first_digit_ptr = start_digits;
+  (void)first_digit_ptr; // only read in the basic_json_fmt path
+
+  FASTFLOAT_IF_CONSTEXPR17(!has_separator) {
+    // Straight-line unroll of the integer-part scan: most integer parts are
+    // 1-5 digits, so peeling the first iterations eliminates the loop back-edge
+    // for the common case. Semantics are identical to the original `while`
+    // loop: i = 10*i + digit, advancing p.
     if ((p != pend) && is_integer(*p)) {
-      i = 10 * i + uint64_t(*p - UC('0'));
+      i = uint64_t(*p - UC('0'));
       ++p;
       if ((p != pend) && is_integer(*p)) {
         i = 10 * i + uint64_t(*p - UC('0'));
@@ -395,29 +410,58 @@ parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
           if ((p != pend) && is_integer(*p)) {
             i = 10 * i + uint64_t(*p - UC('0'));
             ++p;
-            while ((p != pend) && is_integer(*p)) {
-              // a multiplication by 10 is cheaper than an arbitrary integer
-              // multiplication
-              i = 10 * i +
-                  uint64_t(*p - UC('0')); // might overflow, handled later
+            if ((p != pend) && is_integer(*p)) {
+              i = 10 * i + uint64_t(*p - UC('0'));
               ++p;
+              while ((p != pend) && is_integer(*p)) {
+                // a multiplication by 10 is cheaper than an arbitrary integer
+                // multiplication
+                i = 10 * i +
+                    uint64_t(*p - UC('0')); // might overflow, handled later
+                ++p;
+              }
             }
           }
         }
       }
     }
+    digit_count = int64_t(p - start_digits);
+  }
+  else {
+    // Separator-aware scan: a configured digit separator (e.g. '\'') may appear
+    // between digits. It is skipped and does not contribute to the value or the
+    // digit count, but it is retained in the integer span below so the overflow
+    // re-scan can re-tokenize correctly.
+    while (p != pend) {
+      if (*p == separator) {
+        ++p;
+        continue;
+      }
+      if (!is_integer(*p)) {
+        break;
+      }
+      if (digit_count == 0) {
+        first_digit_ptr = p;
+      }
+      i = 10 * i + uint64_t(*p - UC('0')); // might overflow, handled later
+      ++p;
+      ++digit_count;
+    }
   }
   UC const *const end_of_integer_part = p;
-  int64_t digit_count = int64_t(end_of_integer_part - start_digits);
   if (store_spans) {
-    answer.integer = span<UC const>(start_digits, size_t(digit_count));
+    // The span keeps the raw characters (separators included) so the overflow
+    // re-scan below can re-tokenize correctly; for has_separator == false the
+    // length equals digit_count.
+    answer.integer = span<UC const>(start_digits,
+                                    size_t(end_of_integer_part - start_digits));
   }
   FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
     // at least 1 digit in integer part, without leading zeros
     if (digit_count == 0) {
       return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
     }
-    if ((start_digits[0] == UC('0') && digit_count > 1)) {
+    if ((*first_digit_ptr == UC('0') && digit_count > 1)) {
       return report_parse_error<UC>(start_digits,
                                     parse_error::leading_zeros_in_integer_part);
     }
@@ -428,20 +472,40 @@ parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
   if (has_decimal_point) {
     ++p;
     UC const *before = p;
-    // can occur at most twice without overflowing, but let it occur more, since
-    // for integers with many digits, digit parsing is the primary bottleneck.
-    loop_parse_if_eight_digits(p, pend, i);
+    int64_t fractional_digit_count = 0;
+    FASTFLOAT_IF_CONSTEXPR17(!has_separator) {
+      // can occur at most twice without overflowing, but let it occur more,
+      // since for integers with many digits, digit parsing is the primary
+      // bottleneck.
+      loop_parse_if_eight_digits(p, pend, i);
 
-    while ((p != pend) && is_integer(*p)) {
-      uint8_t digit = uint8_t(*p - UC('0'));
-      ++p;
-      i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
+      while ((p != pend) && is_integer(*p)) {
+        uint8_t digit = uint8_t(*p - UC('0'));
+        ++p;
+        i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
+      }
+      fractional_digit_count = int64_t(p - before);
     }
-    exponent = before - p;
+    else {
+      while (p != pend) {
+        if (*p == separator) {
+          ++p;
+          continue;
+        }
+        if (!is_integer(*p)) {
+          break;
+        }
+        uint8_t digit = uint8_t(*p - UC('0'));
+        ++p;
+        i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
+        ++fractional_digit_count;
+      }
+    }
+    exponent = -fractional_digit_count;
     if (store_spans) {
       answer.fraction = span<UC const>(before, size_t(p - before));
     }
-    digit_count -= exponent;
+    digit_count += fractional_digit_count;
   }
   FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
     // at least 1 digit in fractional part
@@ -483,12 +547,30 @@ parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
       // Otherwise, we will be ignoring the 'e'.
       p = location_of_e;
     } else {
-      while ((p != pend) && is_integer(*p)) {
-        uint8_t digit = uint8_t(*p - UC('0'));
-        if (exp_number < 0x10000000) {
-          exp_number = 10 * exp_number + digit;
+      FASTFLOAT_IF_CONSTEXPR17(!has_separator) {
+        while ((p != pend) && is_integer(*p)) {
+          uint8_t digit = uint8_t(*p - UC('0'));
+          if (exp_number < 0x10000000) {
+            exp_number = 10 * exp_number + digit;
+          }
+          ++p;
+        }
+      }
+      else {
+        while (p != pend) {
+          if (*p == separator) {
+            ++p;
+            continue;
+          }
+          if (!is_integer(*p)) {
+            break;
+          }
+          uint8_t digit = uint8_t(*p - UC('0'));
+          if (exp_number < 0x10000000) {
+            exp_number = 10 * exp_number + digit;
+          }
+          ++p;
         }
-        ++p;
       }
       if (neg_exp) {
         exp_number = -exp_number;
@@ -514,9 +596,12 @@ parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
     // It is possible that the integer had an overflow.
     // We have to handle the case where we have 0.0000somenumber.
     // We need to be mindful of the case where we only have zeroes...
-    // E.g., 0.000000000...000.
+    // E.g., 0.000000000...000. The `has_separator &&` guard below is a
+    // compile-time constant, so this loop is identical to the original when the
+    // feature is disabled.
     UC const *start = start_digits;
-    while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
+    while ((start != pend) && (*start == UC('0') || *start == decimal_point ||
+                               (has_separator && *start == separator))) {
       if (*start == UC('0')) {
         digit_count--;
       }
@@ -537,20 +622,60 @@ parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
         p = answer.integer.ptr;
         UC const *int_end = p + answer.integer.len();
         uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
-        while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
-          i = i * 10 + uint64_t(*p - UC('0'));
-          ++p;
+        FASTFLOAT_IF_CONSTEXPR17(!has_separator) {
+          while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
+            i = i * 10 + uint64_t(*p - UC('0'));
+            ++p;
+          }
+          if (i >= minimal_nineteen_digit_integer) { // We have a big integer
+            exponent = end_of_integer_part - p + exp_number;
+          } else { // We have a value with a fractional component.
+            p = answer.fraction.ptr;
+            UC const *frac_end = p + answer.fraction.len();
+            while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
+              i = i * 10 + uint64_t(*p - UC('0'));
+              ++p;
+            }
+            exponent = answer.fraction.ptr - p + exp_number;
+          }
         }
-        if (i >= minimal_nineteen_digit_integer) { // We have a big integer
-          exponent = end_of_integer_part - p + exp_number;
-        } else { // We have a value with a fractional component.
-          p = answer.fraction.ptr;
-          UC const *frac_end = p + answer.fraction.len();
-          while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
+        else {
+          // Separator-aware re-scan: separators are skipped and excluded from
+          // the digit counts that determine the exponent.
+          while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
+            if (*p == separator) {
+              ++p;
+              continue;
+            }
             i = i * 10 + uint64_t(*p - UC('0'));
             ++p;
           }
-          exponent = answer.fraction.ptr - p + exp_number;
+          if (i >= minimal_nineteen_digit_integer) { // We have a big integer
+            int64_t remaining_integer_digits = 0;
+            while (p != int_end) {
+              if (*p == separator) {
+                ++p;
+                continue;
+              }
+              ++p;
+              ++remaining_integer_digits;
+            }
+            exponent = remaining_integer_digits + exp_number;
+          } else { // We have a value with a fractional component.
+            p = answer.fraction.ptr;
+            UC const *frac_end = p + answer.fraction.len();
+            int64_t fraction_digits_consumed = 0;
+            while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
+              if (*p == separator) {
+                ++p;
+                continue;
+              }
+              i = i * 10 + uint64_t(*p - UC('0'));
+              ++p;
+              ++fraction_digits_consumed;
+            }
+            exponent = exp_number - fraction_digits_consumed;
+          }
         }
         // We have now corrected both exponent and i, to a truncated value
       }
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index 40551376..c1fc86b8 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -70,15 +70,35 @@ using from_chars_result = from_chars_result_t<char>;
 
 template <typename UC> struct parse_options_t {
   constexpr explicit parse_options_t(chars_format fmt = chars_format::general,
-                                     UC dot = UC('.'), int b = 10)
-      : format(fmt), decimal_point(dot), base(b) {}
+                                     UC dot = UC('.'), int b = 10,
+                                     UC sep = UC('\0'), uint8_t opts = 0)
+      : format(fmt), decimal_point(dot), digit_separator(sep),
+        format_options(opts), base(b) {}
+
+  // Member order is chosen so that, for the common UC == char case, the two
+  // new single-byte fields land in the padding that already existed between
+  // decimal_point and base. This keeps sizeof(parse_options_t<char>) == 16, so
+  // the struct is still passed in registers (ARM64/x86-64) and the default
+  // parse path is unaffected. Reordering would grow the struct and force it
+  // onto the stack at the call boundary.
 
   /** Which number formats are accepted */
   chars_format format;
   /** The character used as decimal point */
   UC decimal_point;
+  /** The character used as digit separator (e.g. '\''). Use '\0' to disable.
+   * When disabled (the default), the parser compiles to the exact same code as
+   * if this option did not exist: separator handling is gated on a compile-time
+   * template parameter, so the default hot path carries no extra branches. */
+  UC digit_separator;
+  /** Additional format options (bitmask), see the static flags below. */
+  uint8_t format_options;
   /** The base used for integers */
   int base;
+
+  /** Skip a leading base prefix (0x/0X, 0b/0B) before parsing. Decimal-only:
+   * the digits are still parsed in base 10, the prefix is merely consumed. */
+  static constexpr uint8_t skip_prefix = 1;
 };
 
 using parse_options = parse_options_t<char>;
@@ -197,6 +217,12 @@ using parse_options = parse_options_t<char>;
 #define fastfloat_really_inline inline __attribute__((always_inline))
 #endif
 
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#define FASTFLOAT_NOINLINE __declspec(noinline)
+#else
+#define FASTFLOAT_NOINLINE __attribute__((noinline, cold))
+#endif
+
 // Branch-probability hint marking the rare slow-path branches as cold, so the
 // optimizer keeps the out-of-line slow-path re-parse off the hot path (and does
 // not duplicate the force-inlined hot scanner into the caller, which bloated
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index 5d11dfad..d05895ff 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -139,7 +139,7 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept {
 
 template <typename T> struct from_chars_caller {
   template <typename UC>
-  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  fastfloat_really_inline FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
   call(UC const *first, UC const *last, T &value,
        parse_options_t<UC> options) noexcept {
     return from_chars_advanced(first, last, value, options);
@@ -149,7 +149,7 @@ template <typename T> struct from_chars_caller {
 #ifdef __STDCPP_FLOAT32_T__
 template <> struct from_chars_caller<std::float32_t> {
   template <typename UC>
-  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  fastfloat_really_inline FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
   call(UC const *first, UC const *last, std::float32_t &value,
        parse_options_t<UC> options) noexcept {
     // if std::float32_t is defined, and we are in C++23 mode; macro set for
@@ -166,7 +166,7 @@ template <> struct from_chars_caller<std::float32_t> {
 #ifdef __STDCPP_FLOAT64_T__
 template <> struct from_chars_caller<std::float64_t> {
   template <typename UC>
-  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  fastfloat_really_inline FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
   call(UC const *first, UC const *last, std::float64_t &value,
        parse_options_t<UC> options) noexcept {
     // if std::float64_t is defined, and we are in C++23 mode; macro set for
@@ -289,6 +289,39 @@ from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
   return answer;
 }
 
+template <bool bjf, typename UC>
+FASTFLOAT_NOINLINE FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
+parse_number_string_with_separator(UC const *first, UC const *last,
+                                   parse_options_t<UC> options,
+                                   bool store_spans) noexcept {
+  return parse_number_string<bjf, true, UC>(first, last, options, store_spans);
+}
+
+// Runtime -> compile-time dispatch over both boolean knobs of
+// parse_number_string. basic_json_fmt was already dispatched this way; the
+// digit separator is selected here too so that the separator-aware code paths
+// stay confined to the (cold, out-of-line) has_separator==true instantiation.
+// Callers that never set a separator -- the overwhelming majority -- run the
+// has_separator==false instantiation, which is byte-for-byte the original
+// separator-free parser; the separator check is a single predictable branch
+// into cold code.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
+parse_number_string_options(UC const *first, UC const *last,
+                            parse_options_t<UC> options, bool bjf,
+                            bool store_spans) noexcept {
+  if fastfloat_unlikely (options.digit_separator != UC('\0')) {
+    return bjf ? parse_number_string_with_separator<true, UC>(
+                     first, last, options, store_spans)
+               : parse_number_string_with_separator<false, UC>(
+                     first, last, options, store_spans);
+  }
+  return bjf ? parse_number_string<true, false, UC>(first, last, options,
+                                                    store_spans)
+             : parse_number_string<false, false, UC>(first, last, options,
+                                                     store_spans);
+}
+
 // Slow path: re-parse materializing the integer/fraction spans the hot no-span
 // parse skipped, then run the full algorithm. The two callers reach it only
 // through a fastfloat_unlikely branch, so the optimizer keeps this re-parse off
@@ -301,8 +334,7 @@ FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 parse_number_slow_path(UC const *first, UC const *last, T &value,
                        parse_options_t<UC> options, bool bjf) noexcept {
   parsed_number_string_t<UC> pns =
-      bjf ? parse_number_string<true, UC>(first, last, options, true)
-          : parse_number_string<false, UC>(first, last, options, true);
+      parse_number_string_options(first, last, options, bjf, true);
   return from_chars_advanced(pns, value);
 }
 
@@ -336,8 +368,7 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value,
   // parsed_number_string_t off the hot path. store_spans is a runtime argument,
   // so this reuses the single parse_number_string instantiation.
   parsed_number_string_t<UC> pns =
-      bjf ? parse_number_string<true, UC>(first, last, options, false)
-          : parse_number_string<false, UC>(first, last, options, false);
+      parse_number_string_options(first, last, options, bjf, false);
   if (!pns.valid) {
     if (uint64_t(fmt & chars_format::no_infnan)) {
       answer.ec = std::errc::invalid_argument;
@@ -539,6 +570,13 @@ template <typename T, typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 from_chars_advanced(UC const *first, UC const *last, T &value,
                     parse_options_t<UC> options) noexcept {
+  if (((options.format_options & parse_options_t<UC>::skip_prefix) != 0) &&
+      (last - first >= 2) && (*first == UC('0'))) {
+    UC const c_low = UC(first[1] | UC(0x20));
+    if (c_low == UC('x') || c_low == UC('b')) {
+      first += 2;
+    }
+  }
   return from_chars_advanced_caller<
       size_t(is_supported_float_type<T>::value) +
       2 * size_t(is_supported_integer_type<T>::value)>::call(first, last, value,
diff --git a/tests/basictest.cpp b/tests/basictest.cpp
index dba36e8a..5a7106cb 100644
--- a/tests/basictest.cpp
+++ b/tests/basictest.cpp
@@ -681,6 +681,47 @@ TEST_CASE("decimal_point_parsing") {
   }
 }
 
+TEST_CASE("digit_separator") {
+  double result;
+  fast_float::parse_options options{};
+  options.digit_separator = '_';
+  auto parse = [&](std::string const &input, double expected) {
+    auto answer = fast_float::from_chars_advanced(
+        input.data(), input.data() + input.size(), result, options);
+    CHECK_MESSAGE(answer.ec == std::errc(), "expected parse success");
+    CHECK_MESSAGE(answer.ptr == input.data() + input.size(),
+                  "Parsing should have stopped at end");
+    CHECK_EQ(result, expected);
+  };
+  parse("1_000", 1000.0);
+  parse("1.00_5", 1.005);
+  parse("1e1_0", 1e10);
+  parse("1_5e1_2", 15e12);
+  parse("1_5.0_5e1_2", 15.05e12);
+  // overflow re-scan paths (> 19 significant digits) with separators
+  parse("1_000_000_000_000_000_000_000", 1e21);
+  parse("123_456_789_012_345_678_901_234.5", 123456789012345678901234.5);
+  parse("0.0000000000000000000_1234567890123456789", 1.234567890123456789e-20);
+}
+
+TEST_CASE("skip_prefix") {
+  double result;
+  fast_float::parse_options options{};
+  options.format_options = fast_float::parse_options::skip_prefix;
+  auto parse = [&](std::string const &input, double expected) {
+    auto answer = fast_float::from_chars_advanced(
+        input.data(), input.data() + input.size(), result, options);
+    CHECK_MESSAGE(answer.ec == std::errc(), "expected parse success");
+    CHECK_EQ(result, expected);
+  };
+  // prefix is consumed, the remaining digits are parsed in base 10
+  parse("0x10", 10.0);
+  parse("0X25", 25.0);
+  parse("0b11", 11.0);
+  parse("0B11", 11.0);
+  parse("42", 42.0); // no prefix present
+}
+
 TEST_CASE("issue19") {
   std::string const input = "234532.3426362,7869234.9823,324562.645";
   double result;
diff --git a/tests/json_fmt.cpp b/tests/json_fmt.cpp
index 1ba0d5ae..a1135739 100644
--- a/tests/json_fmt.cpp
+++ b/tests/json_fmt.cpp
@@ -131,7 +131,7 @@ int main() {
   for (std::size_t i = 0; i < reject.size(); ++i) {
     auto const &f = reject[i].input;
     auto const &expected_reason = reject[i].reason;
-    auto answer = fast_float::parse_number_string<true>(
+    auto answer = fast_float::parse_number_string<true, false>(
         f.data(), f.data() + f.size(),
         fast_float::parse_options(
             fast_float::chars_format::json |