Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ public interface ClickHouseBinaryFormatWriter {

void setString(int colIndex, String value);

void setString(String column, byte[] value);

void setString(int colIndex, byte[] value);

void setDate(String column, LocalDate value);

void setDate(int colIndex, LocalDate value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,18 @@ public void writeString(String value) throws IOException {
BinaryStreamUtils.writeString(out, value);
}

public void writeString(byte[] value) throws IOException {
BinaryStreamUtils.writeString(out, value);
}

public void writeFixedString(String value, int len) throws IOException {
BinaryStreamUtils.writeFixedString(out, value, len);
}

public void writeFixedString(byte[] value, int len) throws IOException {
SerializerUtils.writeFixedStringBytes(out, value, len);
}

public void writeDate(ZonedDateTime value) throws IOException {
SerializerUtils.writeDate(out, value, value.getZone());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,16 @@ public void setString(int colIndex, String value) {
setValue(colIndex, value);
}

@Override
public void setString(String column, byte[] value) {
setValue(column, value);
}

@Override
public void setString(int colIndex, byte[] value) {
setValue(colIndex, value);
}

@Override
public void setDate(String column, LocalDate value) {
setValue(column, value);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
package com.clickhouse.client.api.data_formats;

import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Objects;

/**
* Holder for ClickHouse {@code String} or {@code FixedString} values that preserves raw bytes
* to avoid lossy decoding and unnecessary allocations.
* <p>
* <b>This is a mutable structure and must be used with care.</b> To avoid copying, it does not
* duplicate the bytes it is given: the constructor wraps the supplied array/buffer instead of
* copying it, and {@link #toByteArray()} returns a direct reference to the backing array rather
* than a defensive copy. Consequently, mutating the source array, the array returned by
* {@link #toByteArray()}, or reading the same value concurrently while it is being modified will
* change the observed value. Callers that need an independent snapshot must copy the bytes
* themselves.
* <p>
* Backed by a {@link ByteBuffer} for a richer API and future off-heap memory support. Only heap
* buffers (with an accessible backing array) are supported today; constructing a value from a
* direct (off-heap) buffer is rejected. The decoded {@link String} produced by {@link #asString()}
* is cached.
*/
public class StringValue {

/** Charset used by {@link #asString()} and {@link #toString()} when no charset is provided. */
public static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;

private final ByteBuffer buffer;

private final Charset defaultCharset;

private volatile String cached;

/**
* Creates a value backed by the given bytes. The array is wrapped, not copied, so it must not be
* modified after being passed in.
*
* @param bytes raw value bytes (not null)
*/
public StringValue(byte[] bytes) {
this(bytes, DEFAULT_CHARSET);
}

/**
* Creates a value backed by the given bytes using the provided default charset. The array is wrapped,
* not copied, so it must not be modified after being passed in.
*
* @param bytes raw value bytes (not null)
* @param defaultCharset charset used by {@link #asString()} and {@link #toString()} (not null)
*/
public StringValue(byte[] bytes, Charset defaultCharset) {
this(ByteBuffer.wrap(bytes), defaultCharset);
}

/**
* Creates a value backed by the remaining content of the given buffer using the provided default charset.
* The buffer is referenced, not copied, so its content must not be modified afterwards.
*
* @param buffer backing heap buffer (not null); its remaining bytes define the value
* @param defaultCharset charset used by {@link #asString()} and {@link #toString()} (not null)
* @throws IllegalArgumentException if the buffer is a direct (off-heap) buffer with no accessible array
*/
public StringValue(ByteBuffer buffer, Charset defaultCharset) {
Objects.requireNonNull(buffer, "buffer cannot be null");
Objects.requireNonNull(defaultCharset, "charset is required to convert buffer to String");

if (!buffer.hasArray()) {
throw new IllegalArgumentException("Can work only with heap buffer.");
}

// Keep an independent view so external position/limit changes do not affect this value.
this.buffer = buffer.slice();
this.defaultCharset = defaultCharset;
}

/**
* Returns a read-only view over the raw bytes of this value. The returned buffer is independent
* (its own position/limit) and shares no mutable state with this value.
*
* @return read-only buffer positioned at the first byte of the value
*/
public ByteBuffer asByteBuffer() {
return buffer.asReadOnlyBuffer();
}

/**
* Returns a direct reference to the backing byte array of this value (no copy is made).
* <p>
* The returned array is the live backing storage: mutating it mutates this value, and any change
* to the underlying bytes is reflected here. Callers that need an independent, immutable snapshot
* must copy the result themselves.
*
* @return the backing array holding the value bytes
*/
public byte[] toByteArray() {
return buffer.array();
}

/**
* @return number of bytes in this value
*/
public int size() {
return buffer.remaining();
}

/**
* @return {@code true} if the value has no bytes
*/
public boolean isEmpty() {
return buffer.remaining() == 0;
}

/**
* Decodes the value using the default charset (UTF-8 unless another was provided at construction).
* The result is cached so repeated calls do not allocate a new string.
*
* @return decoded string
*/
public String asString() {
String s = cached;
if (s == null) {
s = decode(defaultCharset);
cached = s;
}
return s;
}

/**
* Decodes the value using the given charset. The result is cached only when the charset matches the
* default charset of this value.
*
* @param charset charset to decode with (not null)
* @return decoded string
*/
public String asString(Charset charset) {
Objects.requireNonNull(charset, "charset cannot be null");
if (charset.equals(defaultCharset)) {
return asString();
}
return decode(charset);
}

private String decode(Charset charset) {
return new String(buffer.array(), charset);
}

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

StringValue ignores buffer slice

Medium Severity

For values built from a sliced heap ByteBuffer, toByteArray() and decode() use buffer.array() without arrayOffset, position, or remaining(), so they expose or decode the whole backing array instead of the documented remaining bytes. size() stays correct while bytes and strings disagree.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 3012f42. Configure here.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cannot understand your explanation. There is no problem definition. is it a documentation issue ?


@Override
public String toString() {
return asString();
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof StringValue)) {
return false;
}
return buffer.equals(((StringValue) o).buffer);
}

@Override
public int hashCode() {
return buffer.hashCode();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import com.clickhouse.client.api.ClientException;
import com.clickhouse.client.api.DataTypeUtils;
import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader;
import com.clickhouse.client.api.data_formats.StringValue;
import com.clickhouse.client.api.internal.DataTypeConverter;
import com.clickhouse.client.api.internal.MapUtils;
import com.clickhouse.client.api.internal.ServerSettings;
Expand Down Expand Up @@ -532,8 +533,9 @@
}
return (T)array;
} else if (componentType == byte.class) {
if (value instanceof String) {
return (T) ((String) value).getBytes(StandardCharsets.UTF_8);
byte[] bytes = stringLikeToBytes(value);
if (bytes != null) {
return (T) bytes;
} else if (value instanceof InetAddress) {
return (T) ((InetAddress) value).getAddress();
}
Expand Down Expand Up @@ -676,6 +678,24 @@
throw new ClientException("Column of type " + column.getDataType() + " cannot be converted to Instant");
}

/**
* Converts a string-like value into its raw bytes. For a {@link StringValue} the original bytes are
* returned without re-encoding (so binary content is preserved). For a {@link String} the bytes are
* produced using UTF-8, matching the historical behaviour. Returns {@code null} when the value is not
* a string-like type so callers can fall back to other handling.
*
* @param value value to convert
* @return raw bytes or {@code null} if the value is not string-like
*/
public static byte[] stringLikeToBytes(Object value) {
if (value instanceof StringValue) {
return ((StringValue) value).toByteArray();
} else if (value instanceof String) {
return ((String) value).getBytes(StandardCharsets.UTF_8);
}
return null;

Check warning on line 696 in client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Return an empty array instead of null.

See more on https://sonarcloud.io/project/issues?id=ClickHouse_clickhouse-java&issues=AZ7xVs7yBSP9yiGow8s3&open=AZ7xVs7yBSP9yiGow8s3&pullRequest=2887
}

static Instant objectToInstant(Object value) {
if (value instanceof LocalDateTime) {
LocalDateTime dateTime = (LocalDateTime) value;
Expand Down Expand Up @@ -866,6 +886,10 @@
BinaryStreamReader.ArrayValue array = (BinaryStreamReader.ArrayValue) value;
if (array.itemType == String.class) {
return (String[]) array.getArray();
} else if (array.itemType == StringValue.class) {
StringValue[] stringValues = (StringValue[]) array.getArray();
return Arrays.stream(stringValues)
.map(sv -> sv == null ? null : sv.asString()).toArray(String[]::new);
} else if (array.itemType == BinaryStreamReader.EnumValue.class) {
BinaryStreamReader.EnumValue[] enumValues = (BinaryStreamReader.EnumValue[]) array.getArray();
return Arrays.stream(enumValues).map(BinaryStreamReader.EnumValue::getName).toArray(String[]::new);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.clickhouse.client.api.ClientException;
import com.clickhouse.client.api.DataTypeUtils;
import com.clickhouse.client.api.data_formats.StringValue;
import com.clickhouse.data.ClickHouseColumn;
import com.clickhouse.data.ClickHouseDataType;
import com.clickhouse.data.ClickHouseEnum;
Expand Down Expand Up @@ -55,6 +56,8 @@ public class BinaryStreamReader {

private final Class<?> arrayDefaultTypeHint;

private final boolean stringAsBinaryDefault;

private static final int SB_INIT_SIZE = 100;

private ClickHouseColumn lastDataColumn = null;
Expand All @@ -69,7 +72,7 @@ public class BinaryStreamReader {
* @param jsonAsString - use string to serialize/deserialize JSON columns
* @param typeHintMapping - what type use as hint if hint is not set or may not be known.
*/
BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, ByteBufferAllocator bufferAllocator, boolean jsonAsString, Map<ClickHouseDataType, Class<?>> typeHintMapping) {
public BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, ByteBufferAllocator bufferAllocator, boolean jsonAsString, Map<ClickHouseDataType, Class<?>> typeHintMapping) {
this.log = log == null ? NOPLogger.NOP_LOGGER : log;
this.timeZone = timeZone;
this.input = input;
Expand All @@ -78,6 +81,20 @@ public class BinaryStreamReader {

this.arrayDefaultTypeHint = typeHintMapping == null ||
typeHintMapping.isEmpty()? NO_TYPE_HINT : typeHintMapping.get(ClickHouseDataType.Array);
this.stringAsBinaryDefault = typeHintMapping != null &&
typeHintMapping.get(ClickHouseDataType.String) == StringValue.class;
}

private boolean readStringAsBinary(Class<?> typeHint) {
if (typeHint != null) {
if (typeHint == StringValue.class) {
return true;
}
if (typeHint == String.class) {
return false;
}
}
return stringAsBinaryDefault;

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FixedString type hint ignored

Medium Severity

readStringAsBinary only derives its default from typeHintMapping when ClickHouseDataType.String is set to StringValue.class. Record reads always pass a null per-column hint, so a mapping that lists only FixedStringStringValue never enables binary reads and those columns stay UTF-8 String values.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 3012f42. Configure here.

}

/**
Expand Down Expand Up @@ -121,12 +138,18 @@ public <T> T readValue(ClickHouseColumn column, Class<?> typeHint) throws IOExce
switch (dataType) {
// Primitives
case FixedString: {
if (readStringAsBinary(typeHint)) {
return (T) new StringValue(readStringBytes(input, precision));
}
byte[] bytes = precision > STRING_BUFF.length ?
new byte[precision] : STRING_BUFF;
readNBytes(input, bytes, 0, precision);
return (T) new String(bytes, 0, precision, StandardCharsets.UTF_8);
}
case String: {
if (readStringAsBinary(typeHint)) {
return (T) readStringValue();
}
return (T) readString();
}
case Int8:
Expand Down Expand Up @@ -1119,17 +1142,41 @@ public String readString() throws IOException {
}

/**
* Reads a decimal value from input stream.
* Reads a string from the internal input stream preserving the raw bytes as a {@link StringValue}.
* Unlike {@link #readString()} this does not decode bytes into a {@link String} and never reuses the
* shared buffer, so the value is safe to keep after the next read.
*
* @return string value holding the raw bytes
* @throws IOException when IO error occurs
*/
public StringValue readStringValue() throws IOException {
return new StringValue(readStringBytes(input, readVarInt(input)));
}

/**
* Reads the raw bytes of a string from the input stream given its length.
*
* @param input - source of bytes
* @return String
* @param len - number of bytes to read
* @return byte[] containing the raw string bytes
* @throws IOException when IO error occurs
*/
public static String readString(InputStream input) throws IOException {
int len = readVarInt(input);
public static byte[] readStringBytes(InputStream input, int len) throws IOException {
if (len == 0) {
return "";
return new byte[0];
}
return new String(readNBytes(input, len), StandardCharsets.UTF_8);
return readNBytes(input, len);
}

/**
* Reads a string value from input stream.
* @param input - source of bytes
* @return String
* @throws IOException when IO error occurs
*/
public static String readString(InputStream input) throws IOException {
byte[] bytes = readStringBytes(input, readVarInt(input));
return bytes.length == 0 ? "" : new String(bytes, StandardCharsets.UTF_8);
}

public static int readByteOrEOF(InputStream input) throws IOException {
Expand Down
Loading
Loading