Source code for deephaven.experimental.data_index

#
# Copyright (c) 2016-2025 Deephaven Data Labs and Patent Pending
#
"""This module provides the ability to create, check, and retrieve DataIndex objects from Deephaven tables."""

from collections.abc import Sequence
from typing import Optional

import jpy

from deephaven import DHError
from deephaven._wrapper import JObjectWrapper
from deephaven.jcompat import j_list_to_list
from deephaven.table import Table

_JDataIndexer = jpy.get_type("io.deephaven.engine.table.impl.indexer.DataIndexer")
_JDataIndex = jpy.get_type("io.deephaven.engine.table.DataIndex")


[docs]class DataIndex(JObjectWrapper): """A DataIndex is an index used to improve the speed of data access operations for a Deephaven table. The index applies to one or more indexed (key) column(s) of a Deephaven table. Note that a DataIndex itself is backed by a table.""" j_object_type = _JDataIndex def __init__(self, j_data_index: jpy.JType): self._j_data_index = j_data_index @property def j_object(self) -> jpy.JType: return self._j_data_index @property def keys(self) -> list[str]: """The names of the columns indexed by the DataIndex.""" return j_list_to_list(self._j_data_index.keyColumnNames()) @property def table(self) -> Table: """The backing table of the DataIndex.""" return Table(self._j_data_index.table())
[docs]def has_data_index(table: Table, key_cols: Sequence[str]) -> bool: """Checks if a table currently has a DataIndex for the given key columns. Args: table (Table): the table to check key_cols (Sequence[str]): the names of the key columns indexed Returns: bool: True if the table has a DataIndex, False otherwise """ return _JDataIndexer.hasDataIndex(table.j_table, key_cols)
def _get_data_index(table: Table, key_cols: Sequence[str]) -> Optional[DataIndex]: """Gets a DataIndex for the given key columns. Returns None if the DataIndex does not exist. Args: table (Table): the table to get the DataIndex from key_cols (Sequence[str]): the names of the key columns indexed Returns: a DataIndex or None """ j_di = _JDataIndexer.getDataIndex(table.j_table, key_cols) return DataIndex(j_di) if j_di else None
[docs]def data_index( table: Table, key_cols: Sequence[str], create_if_absent: bool = True ) -> Optional[DataIndex]: """Gets the DataIndex for the given key columns on the provided table. When the DataIndex already exists, returns it. When the DataIndex doesn't already exist, if create_if_absent is True, creates the DataIndex first then returns it; otherwise returns None. Args: table (Table): the table to index key_cols (Sequence[str]): the names of the key columns to index create_if_absent (bool): if True, create the DataIndex if it does not already exist, default is True Returns: a DataIndex or None Raises: DHError """ try: if not create_if_absent: return _get_data_index(table, key_cols) return DataIndex(_JDataIndexer.getOrCreateDataIndex(table.j_table, key_cols)) except Exception as e: raise DHError(e, "failed to create DataIndex.") from e