Source code for deephaven.learn.gather
#
# Copyright (c) 2016-2026 Deephaven Data Labs and Patent Pending
#
"""Utilities for gathering Deephaven table data into Python objects"""
import enum
import jpy
import numpy as np
from deephaven import DHError
_JGatherer = jpy.get_type("io.deephaven.integrations.learn.gather.NumPy")
[docs]class MemoryLayout(enum.Enum):
"""Memory layouts for an array."""
ROW_MAJOR = True
""" Row-major memory layout."""
COLUMN_MAJOR = False
""" Column-major memory layout."""
C = True
""" Memory layout consistent with C arrays (row-major)."""
FORTRAN = False
""" Memory layout consistent with Fortran arrays (column-major)."""
def __init__(self, is_row_major):
self.is_row_major = is_row_major
def _convert_to_numpy_dtype(np_type: type) -> type:
"""Converts an input type to the corresponding NumPy data type."""
if np_type.__module__ == np.__name__:
return np_type
elif np_type is bool:
np_type = np.bool_
elif np_type is float:
np_type = np.double
elif np_type is int:
np_type = np.intc
else:
raise ValueError(
f"{np_type} is not a data type that can be converted to a NumPy dtype."
)
return np_type
[docs]def table_to_numpy_2d(
row_set,
col_set,
order: MemoryLayout = MemoryLayout.ROW_MAJOR,
np_type: type = np.intc,
) -> np.ndarray:
"""Converts Deephaven table data to a 2d NumPy array of the appropriate size
Args:
row_set: a RowSequence describing the number of rows in the table
col_set: ColumnSources describing which columns to copy
order (MemoryLayout): the desired memory layout of the output array
np_type: the desired NumPy data type of the output NumPy array
Returns
a np.ndarray
Raises:
DHError
"""
try:
np_type = _convert_to_numpy_dtype(np_type)
if np_type == np.byte:
buffer = _JGatherer.tensorBuffer2DByte(row_set, col_set, order.is_row_major)
elif np_type == np.short:
buffer = _JGatherer.tensorBuffer2DShort(
row_set, col_set, order.is_row_major
)
elif np_type == np.intc:
buffer = _JGatherer.tensorBuffer2DInt(row_set, col_set, order.is_row_major)
elif np_type == np.int_:
buffer = _JGatherer.tensorBuffer2DLong(row_set, col_set, order.is_row_major)
elif np_type == np.single:
buffer = _JGatherer.tensorBuffer2DFloat(
row_set, col_set, order.is_row_major
)
elif np_type == np.double:
buffer = _JGatherer.tensorBuffer2DDouble(
row_set, col_set, order.is_row_major
)
else:
raise ValueError(f"Data type {np_type} is not supported.")
tensor: np.ndarray = np.frombuffer(buffer, dtype=np_type)
if order.is_row_major:
tensor.shape = (len(col_set), row_set.intSize())
return tensor.T
else:
tensor.shape = (row_set.intSize(), len(col_set))
return tensor
except Exception as e:
raise DHError(
e,
f"failed to convert rows: {row_set} and cols: {col_set} to a 2D NumPy array",
) from e