Source code for deephaven.experimental.data_index
#
# Copyright (c) 2016-2025 Deephaven Data Labs and Patent Pending
#
"""This module provides the ability to create, check, and retrieve DataIndex objects from Deephaven tables."""
from typing import List, Optional
import jpy
from deephaven import DHError
from deephaven._wrapper import JObjectWrapper
from deephaven.jcompat import j_list_to_list
from deephaven.table import Table
_JDataIndexer = jpy.get_type("io.deephaven.engine.table.impl.indexer.DataIndexer")
_JDataIndex = jpy.get_type("io.deephaven.engine.table.DataIndex")
[docs]class DataIndex(JObjectWrapper):
"""A DataIndex is an index used to improve the speed of data access operations for a Deephaven table. The index
applies to one or more indexed (key) column(s) of a Deephaven table.
Note that a DataIndex itself is backed by a table."""
j_object_type = _JDataIndex
def __init__(self, j_data_index: jpy.JType):
self._j_data_index = j_data_index
@property
def j_object(self) -> jpy.JType:
return self._j_data_index
@property
def keys(self) -> List[str]:
"""The names of the columns indexed by the DataIndex. """
return j_list_to_list(self._j_data_index.keyColumnNames())
@property
def table(self) -> Table:
"""The backing table of the DataIndex."""
return Table(self._j_data_index.table())
[docs]def has_data_index(table: Table, key_cols: List[str]) -> bool:
"""Checks if a table currently has a DataIndex for the given key columns.
Args:
table (Table): the table to check
key_cols (List[str]): the names of the key columns indexed
Returns:
bool: True if the table has a DataIndex, False otherwise
"""
return _JDataIndexer.hasDataIndex(table.j_table, key_cols)
def _get_data_index(table: Table, key_cols: List[str]) -> Optional[DataIndex]:
"""Gets a DataIndex for the given key columns. Returns None if the DataIndex does not exist.
Args:
table (Table): the table to get the DataIndex from
key_cols (List[str]): the names of the key columns indexed
Returns:
a DataIndex or None
"""
j_di = _JDataIndexer.getDataIndex(table.j_table, key_cols)
return DataIndex(j_di) if j_di else None
[docs]def data_index(table: Table, key_cols: List[str], create_if_absent: bool = True) -> Optional[DataIndex]:
"""Gets the DataIndex for the given key columns on the provided table. When the DataIndex already exists, returns it.
When the DataIndex doesn't already exist, if create_if_absent is True, creates the DataIndex first then returns it;
otherwise returns None.
Args:
table (Table): the table to index
key_cols (List[str]): the names of the key columns to index
create_if_absent (bool): if True, create the DataIndex if it does not already exist, default is True
Returns:
a DataIndex or None
Raises:
DHError
"""
try:
if not create_if_absent:
return _get_data_index(table, key_cols)
return DataIndex(_JDataIndexer.getOrCreateDataIndex(table.j_table, key_cols))
except Exception as e:
raise DHError(e, "failed to create DataIndex.") from e