# ACCESS DATA ## Sourcing tables # NEEDED: The named Tables in these two lines are used throughout # the pages of examples below. # Note: the tableName left of the "= sign" is a variable named whatever you want allAvailable = db.getCatalog() # full catalog of namespaces and tables staticSource1 = db.t("LearnDeephaven", "StockTrades") # historical data staticSource2 = db.t("LearnDeephaven", "EODTrades") # historical EOD data tickingSource = db.i("DbInternal", "ProcessEventLog") # access a real-time updating table! # intraday data # note: db.i instead of db.t from deephaven import * # accesses a necessary library for the below snapshotEx = db.i("DbInternal", "ProcessEventLog", False) # intraday data w/o ticking ## Merging tables copy1 = staticSource1.where() # 3.16 mm records copy2 = staticSource1.where() # same 3.16 mm records in a table of a different name mergeBothCopies = ttools.merge(copy1, copy2) # to merge tables, the schema must be identical # same column names, same column data types # merging updating (ticking) tables follows the same standard copyUpdating1 = tickingSource.where() copyUpdating2 = tickingSource.where() mergeBothCopies = ttools.merge(copyUpdating1, copyUpdating2) # same schema for each of the tables to be merged # one can merge static and ticking tables staticSource1v2 = staticSource1.where().view("Date", "Timestamp", "SourceRowNum = i") tickingSourcev2 = tickingSource.where().view("Date", "Timestamp", "SourceRowNum = i") mergeStaticAndUpdating = ttools.merge(staticSource1v2, tickingSourcev2) # again, same schema for each of the tables to be merged # in the UI right-click on the table heading, then # hit Reverse/Before-sorting from the menu -- updates at top ## Viewing metadata of a table seeMetadata = staticSource1.getMeta() # column names, data types, partitions, groups ## Printing information print(db.getNamespaces()) # just the namespaces, not also the tables print (db.getIntradayPartitions("DbInternal", "ProcessEventLog")) # see partitions from deephaven.Calendars import calendarNames print(calendarNames()) # list of business calendars shipped with Deephaven # describes the packages available in the main deephaven module help('deephaven') # FILTER # VERY IMPORTANT: Backticks (`) are used for strings and single quotes (') are used for times. # VERY IMPORTANT: Most queries benefit by starting with filters. Less data generally means better performance. # For SQL developers: In Deephaven, Joins are not a primary operation for filtering. # Use where(), whereIn(), and whereNotIn(). ## Date & Time examples todaysData1 = tickingSource.where("Date = currentDay()") # Date for this is a String todaysData2 = tickingSource.where("Date = currentDateNy()") # popular for NY todaysData3 = tickingSource.where("Date = currentDate(TZ_SYD)") # sydney timezone # VERY IMPORTANT: Filtering first by partitioning values (which is most often the set of Dates) is both a best practice, # and necessary for some downstream use cases. singleStringDate = staticSource1.where("Date = `2017-08-23`") # HEAVILY USED! lessThanDate = staticSource1.where("Date < `2017-08-23`") # use >=, etc. as well oneMonthString = staticSource1.where("Date.startsWith(`2017-08`)") singleDbDate = staticSource2.where("formatDate(EODTimestamp, TZ_NY) = `2017-03-01`") afterDbDatetime = staticSource1.where("Timestamp > '2017-08-25T10:30:00 NY'") justBizTime = staticSource1.where("isBusinessTime(Timestamp)") # HEAVILY USED! justAugustDatetime = staticSource2.where("monthOfYear(EODTimestamp, TZ_NY) = 8") just10amHour = staticSource1.where("hourOfDay(Timestamp, TZ_NY) = 10") justTues = staticSource2.where("dayOfWeek(EODTimestamp).getValue() = 2") # Tuesdays ## String examples oneStringMatch = staticSource1.where("USym = `AAPL`") # match filter stringSetMatch = staticSource1.where("USym in `AAPL`, `MSFT`, `GOOG`") caseInsensitive = staticSource1.where("USym icase in `aapl`, `msft`, `goog`") notInExample = staticSource1.where("USym not in `AAPL`, `MSFT`, `GOOG`") # see "not" containsExample = staticSource1.where("USym.contains(`I`)") notContainsExample = staticSource1.where("!USym.contains(`I`)") startsWithExample = staticSource1.where("USym.startsWith(`A`)") endsWithExample = staticSource1.where("USym.endsWith(`M`)") ## Number examples equalsExample = staticSource2.where("round(Open) = 44") lessThanExample = staticSource2.where("High < 8.42") someManipulation = staticSource2.where("(Close - Open) / Open > 0.05") modExample1 = staticSource2.where("i % 10 = 0") # every 10th row modExample2 = staticSource2.where("Ticker.length() % 2 = 0") # even char-count Tickers ## Multiple filters conjunctiveComma = staticSource1.where("Date = `2017-08-23`", "USym = `AAPL`") # HEAVILY USED! conjunctiveAmpA = staticSource1.where("Date = `2017-08-23` && USym = `AAPL`") disjunctiveSameCol = staticSource1.where("Date = `2017-08-23`\ || Date = `2017-08-25`") # thisDate or thatDate disjunctiveDiffCol = staticSource1.where("Date = `2017-08-23` || USym = `AAPL`") # thisDate or thatUSym rangeLameWay = staticSource1.where("Date >= `2017-08-21`", "Date <= `2017-08-23`") inRangeBest = staticSource1.where("inRange(Date, `2017-08-21`, `2017-08-23`)") # HEAVILY USED! inRangeBestString = staticSource1.where("inRange(USym.substring(0,1), `A`, `D`)") # starts with letters A - D ## WhereIn / WhereNotIn # run these two queries together usymSetDriver = staticSource1.renameColumns("Ticker = USym") whereInExample = staticSource2.whereIn(usymSetDriver, "Ticker") # HEAVILY USED! # filters staticSource2 USyms to be the same as # the set of USyms in staticSource1 to be # the same as in usymSetDriver # this can be dynamic to accommodate changing usymSetDriver data whereInExample = staticSource2.whereIn(staticSource1, "Ticker = USym")\ # Ticker in staticSource2 within list of USym of staticSource1 whereNotInExample = staticSource2.whereNotIn(staticSource1, "Ticker = USym")\ # see the “not” ## Nulls and NaNs nullExample = staticSource1.where("isNull(ExchangeId)") # all data types supported notNullExample = staticSource1.where("!isNull(ExchangeId)") nanExample = staticSource1.where("isNaN(Last)") notNanExample = staticSource1.where("!isNaN(Last)") neitherNanNull = staticSource1.where("isNormal(Last)") # normal = not null nor NaN eitherNanNull = staticSource1.where("!isNormal(Last)") # not normal ## Head and Tail first10k = staticSource2.head(10_000) last10k = staticSource2.tail(10_000) # note this would deliver a changing set if intraday # (updating) data first15Perc = staticSource2.headPct(0.15) # the first 15% of rows last15Perc = staticSource2.tailPct(0.15) # the last 15% of rows first20byKey = staticSource1.headBy(20, "USym") # first 20 rows for each USym last20byKey = staticSource1.tailBy(20, "USym") # last 20 rows for each USym # SORT oneSortTime = staticSource1.sort("Timestamp") # works for all data types, actually oneSortNumber = staticSource1.sort("Last") # sorting a number oneSortString = staticSource1.sort("Exchange") # sorting a string sortDescExample = staticSource1.sortDescending("Timestamp") # highest to lowest multiSortClassic = staticSource1.sort("Exchange","Timestamp") # Exchange then Time # run these below together SortPair = jpy.get_type("com.illumon.iris.db.tables.SortPair") sortPairs = staticSource1.sort(SortPair.ascending("Exchange"),\ SortPair.descending("Timestamp")) # Exchange ascending then Timestamp descending reverseTable = staticSource1.reverse() # HEAVILY USED! Very cheap to support GUIs reverseUpdating = tickingSource.reverse() # updates then come to the top of table # SELECT AND CREATE NEW COLUMNS # VERY IMPORTANT: Use select() and update() when (i) expensive to calculate or (ii) accessed frequently. ## Version 1: Choose and add new columns -- Calculate and write to memory selectColumns = staticSource2.select("EODTimestamp", "Ticker", "Open", "Close") # constrain to only those 4 columns, write to memory selectAddCol = staticSource2.select("EODTimestamp", "Ticker", "Open", "Close",\ "Change = Close - Open") # constrain and add a new column calculating selectAndUpdateCol = staticSource2.select("EODTimestamp", "Ticker", "Open",\ "Close").update("Change = Close - Open") # add a new column calculating - logically equivalent to previous # example # VERY IMPORTANT: Use view() and updateView() when (i) formula is quick or # (ii) only a portion of the data is used at a time. Minimizes RAM used. ## Version 2: Choose and add new columns -- Reference a formula and calc on the fly viewColumns = staticSource2.view("EODTimestamp", "Ticker", "Open", "Close") # similar to select(), though uses on-demand formula viewAddCol = staticSource2.updateView("EODTimestamp", "Ticker", "Open", "Close",\ "Change = Close - Open") # view set and add a column, though with an on-demand formula viewAndUpdateViewCol = staticSource2.view("EODTimestamp", "Ticker", "Open",\ "Close").updateView("Change = Close - Open") # logically equivalent to previous example # VERY IMPORTANT: Use lazyUpdate() when there are a small-ish number of unique values; # on-demand formula results are stored in cache and re-used. ## Version 3: Add new columns -- Reference a formula and calc on the fly lazyUpdateEx = staticSource2.lazyUpdate("EODTimestamp", "Ticker", "Open", "Close",\ "Change = Close - Open") # similar to update(), though uses on-demand formula ## Things one can do within updateView (or equivalently update, select, view) methods: ### GET THE ROW NUMBER getTheRowNumber = staticSource2.updateView("RowNum = i") ### DO SOME MATH simpleMath = staticSource2.updateView("RandomNumber = Math.random()",\ "RandomInt100 = new Random().nextInt(100)",\ "Arithmetic = Close * Volume",\ "SigNum = signum(RandomNumber - 0.5)",\ "Signed = SigNum * Arithmetic",\ "AbsDlrVolume = abs(Signed)",\ "Power = Math.pow(i, 2)",\ "Exp = Close * 1E2",\ "Log = Close * log(2.0)",\ "Round = round(Close)",\ "Floor = floor(Close)",\ "Mod = RandomInt100 % 5",\ "CastInt = (int)AbsDlrVolume",\ "CastLong = (long)AbsDlrVolume") ### HANDLE ARRAYS arrayExamples = staticSource2.updateView("RowNumber = i",\ "PrevRowReference = Close_[i-1]",\ "MultipleRowRef = Close_[i-2] - Close_[i-1]",\ "SubArray = Close_.subArray(i-2, i+1)",\ "SumSubArray = sum(Close_.subArray(i-2, i+1))",\ "ArraySize = SubArray.size()",\ "ArrayMin = min(SubArray)",\ "ArrayMax = max(SubArray)",\ "ArrayMedian = median(SubArray)",\ "ArrayAvg = avg(SubArray)",\ "ArrayStd = std(SubArray)",\ "ArrayVar = var(SubArray)",\ "ArrayLast = last(SubArray)",\ "ArrayIndex = SubArray[1]",\ "InArray = in(45.71, SubArray)") ### CALCULATE PERCENTILE IN A SERIES viewAndSortPrices = staticSource2.view("Date = formatDate(EODTimestamp, TZ_NY)",\ "Ticker", "ClosingPrice = Close")\ .sort("Ticker", "ClosingPrice") closingPriceArraysByTicker = viewAndSortPrices.by("Ticker") # join two tables above using natural join priceAndArray = viewAndSortPrices.naturalJoin(closingPriceArraysByTicker, "Ticker",\ "PriceArray = ClosingPrice").sort("Ticker", "Date") #use binSearch to drive the percentage calculation percRank = priceAndArray.updateView("ArraySize = PriceArray.size()",\ "BinRank = binSearchIndex(PriceArray, ClosingPrice, BS_LOWEST)",\ "PercRank = BinRank / ArraySize",\ "Perc25 = percentile(PriceArray, 0.25)",\ "Median = median(PriceArray)",\ "Perc75 = percentile(PriceArray, 0.75)") ### MANIPULATE TIME AND CALENDARS timeStuff = staticSource2.updateView("CurrentTime = currentTime()",\ "CurrentDateDefault = currentDay()",\ "CurrentDateNy = currentDateNy()",\ "CurrentDateLon = currentDate(TZ_LON)",\ "LastBizDateNy = lastBusinessDateNy()",\ "IsAfter = CurrentTime > EODTimestamp",\ "IsBizDay = isBusinessDay(CurrentDateLon)",\ "StringDT = format(EODTimestamp, TZ_NY)",\ "StringDate = formatDate(EODTimestamp, TZ_NY)",\ "StringToTime = convertDateTime(StringDate + `T12:00 NY`)",\ "AddTime = EODTimestamp + '05:11:04.332'",\ "PlusHour = EODTimestamp + HOUR",\ "LessTenMins = EODTimestamp - 10 * MINUTE",\ "DiffYear = diffYear(EODTimestamp, currentTime())",\ "DiffDay = diffDay(EODTimestamp, CurrentTime)",\ "DiffNanos = PlusHour - EODTimestamp",\ "DayWeek = dayOfWeek(EODTimestamp, TZ_NY)",\ "HourDay = hourOfDay(EODTimestamp, TZ_NY)",\ "DateAtMidnight = dateAtMidnight(EODTimestamp, TZ_NY)",\ "IsBizDayString = isBusinessDay(StringDate)",\ "IsBizDayDatetime = isBusinessDay(EODTimestamp)",\ "IsBizTime = isBusinessDay(EODTimestamp)",\ "FracBizDayDone = fractionOfBusinessDayComplete(currentTime())",\ "FracBizDayOpen = fractionOfBusinessDayRemaining(currentTime())",\ "NextNonBizDay = nextNonBusinessDay()",\ "NextBizDayCurrent = nextBusinessDay()",\ "NextBizDayString = nextBusinessDay(StringDate)",\ "NextBizDayDatetime = nextBusinessDay(EODTimestamp)",\ "PlusFiveBizDayCurrent = nextBusinessDay(5)",\ "PlusFBizDayString = nextBusinessDay(StringDate, 5)",\ "PlusFBizDayDatetime = nextBusinessDay(EODTimestamp, 5)",\ "PrevBizDayCurrent = previousBusinessDay()",\ "PrevBizDayString = previousBusinessDay(StringDate)",\ "PrevBizDayDatetime = previousBusinessDay(EODTimestamp)",\ "MinusFiveBizDayCurrent = previousBusinessDay(5)",\ "MinusFiveBizDayString = previousBusinessDay(StringDate, 5)",\ "MinusFiveBizDayDatetime = previousBusinessDay(EODTimestamp, 5)",\ "BizDayArray = businessDaysInRange(EODTimestamp, currentTime())",\ "NonBizDayArray = nonBusinessDaysInRange(EODTimestamp, currentTime())",\ "DiffBizDaysCount = numberOfBusinessDays(EODTimestamp, currentTime())",\ "DiffBizDaysExact = diffBusinessDay(EODTimestamp, currentTime())",\ "DiffBizDaysString = numberOfBusinessDays(MinusFiveBizDayString, StringDate)",\ "StandardBizDayNanos = standardBusinessDayLengthNanos()",\ "DiffBizSecs = diffBusinessNanos(CurrentTime, CurrentTime + 5 * DAY) / 1E9",\ "LastBizOfMonth = isLastBusinessDayOfMonth(StringDate)",\ "LastBizOfWeek = isLastBusinessDayOfWeek(currentTime())") ### BIN DATA bins = staticSource1.updateView("PriceBin = upperBin(Last, 20)", "SizeBin = lowerBin(Size, 100)",\ "TimeBin = upperBin(ExchangeTimestamp, '00:15:00')") aggBin = bins.view("TimeBin", "Dollars = Last * Size").sumBy("TimeBin") # binning is a great pre-step for aggregating to support # the down-sampling or other profiling of data ### MANIPULATE STRINGS stringStuff = staticSource2.view("StringDate = formatDate(EODTimestamp, TZ_NY)",\ "Ticker","Close",\ "NewString = `new_string_example_`",\ "ConcatV1 = NewString + Ticker",\ "ConcatV2 = NewString + `Note_backticks!!`",\ "ConcatV3 = NewString.concat(Ticker)",\ "ConcatV4 = NewString.concat(`Note_backticks!!`)",\ "StartBool = Ticker.startsWith(`M`)",\ "NoEndBool = !Ticker.endsWith(`OG`)",\ "ContainedBool = Ticker.contains(`AA`)",\ "NoContainBool = !Ticker.contains(`AA`)",\ "FirstChar = Ticker.substring(0,1)",\ "LengthString = Ticker.length()",\ "CharIndexPos = ConcatV1.charAt(19)",\ "SubstringEx = ConcatV1.substring(11,20)",\ "FindIt = NewString.indexOf(`_`)",\ "FindItMiddle = NewString.indexOf(`_`, FindIt + 1)",\ "FindLastOf = NewString.lastIndexOf(`_`)",\ "SplitToArrays = NewString.split(`_`)",\ "SplitWithMax = NewString.split(`_`, 2)",\ "SplitIndexPos = NewString.split(`_`)[1]",\ "LowerCase = Ticker.toLowerCase()",\ "UpperCase = NewString.toUpperCase()",\ "DoubleToStringv1 = Close + ``",\ "DoubleToStringv2 = String.valueOf(Close)",\ "DoubleToStringv3 = Double.toString(Close)",\ "StringToDoublev1 = Double.valueOf(DoubleToStringv1)") ### USE TERNARIES; IF-THENS ifThenexample = staticSource2\ .updateView("SimpleTernary = Close < 100 ? `smaller` : `bigger`",\ "TwoLayers = Close <= 10 ? `small` : Close < 100 ? `medium` : `large`",\ "Conj = Close < 100 && Ticker.contains(`A`) ? `special` : `boring`",\ "Disj = Close < 100 || Ticker.contains(`A`) ? `fave` : `other`",\ "PrevClose = Ticker_[i-1] = Ticker ? Close_[i-1] : NULL_DOUBLE") ### CREATE AND USE A CUSTOM FUNCTION def mult(a,b): return a*b pyFuncExample = staticSource2.update("M=(double)mult.call(Close,Volume)") # MANIPULATE COLUMNS uniqueValues = staticSource2.selectDistinct("Ticker") # show unique set # works on all data types - careful with doubles, longs uniqueValuesCombo = staticSource2.selectDistinct("EODTimestamp", "Ticker") # unique combinations of EODTime with Ticker renameStuff = staticSource2.renameColumns("USym = Ticker", "ClosingPrice = Close") dropColumn = staticSource2.dropColumns("Volume") # same for drop one or many dropColumns = staticSource2.dropColumns("ImportDate", "Open", "Volume") putColsAtStart = staticSource2.moveUpColumns("Ticker", "Volume") # Makes 1st col(s) putColsWherever = staticSource2.moveColumns(1, "Ticker", "Volume") # makes Ticker the 2nd and Volume the 3rd column colDescription = staticSource1.withColumnDescription("USym", "Underlying Symbol") # when user hovers on column header the description is visible # GROUP AND AGGREGATE ## Simple Grouping groupToArrays1 = staticSource1.by("USym") # one row per key (i.e., USym), all other columns are arrays multipleKeys = staticSource1.by("USym", "Exchange") # one row for each key-combination (i.e. USym-Exchange pairs) ## Un-Grouping # Run these lines together aggByKey = staticSource1.by("Date") # one row per Date, other fields are arrays from staticSource1 ungroupThatOutput = aggByKey.ungroup() # no arguments usually # each array value becomes its own row # in this case turns aggByDatetimeKey back into staticSource1 ## Aggregations # IMPORTANT: Any columns not in the parentheses of the whateverBy("Col1", "Col2") statement, # needs to be an ok type for that aggregation method -- i.e. sums need to have all non-key columns be numbers. firstByKey = staticSource1.firstBy("Date") firstByTwoKeys = staticSource1.firstBy("Date", "USym") # All below work with multi countOfEntireTable = staticSource1.countBy("USym") # single arg returns tot count countOfGroup = staticSource1.countBy("N", "USym") # N is the name of the column storing the count firstOfGroup = staticSource1.firstBy("USym") lastOfGroup = staticSource1.lastBy("USym") sumOfGroup = staticSource1.view("USym", "Last").sumBy("USym") # non-key field must be numerical avgOfGroup = staticSource1.view("USym", "Last").avgBy("USym") stdOfGroup = staticSource1.view("USym", "Last").stdBy("USym") varOfGroup = staticSource1.view("USym", "Last").varBy("USym") medianOfGroup = staticSource1.view("USym", "Last").medianBy("USym") minOfGroup = staticSource1.view("USym", "Last").minBy("USym") maxOfGroup= staticSource1.view("USym", "Last").maxBy("USym") ## Combined Aggregations # combine aggregations in a single method (using the same key-grouping) from deephaven import * combinationAgg = staticSource1.updateView("Dollars = Last * Size")\ .by(caf.AggCombo(\ caf.AggLast("LastPrice = Last","LastTimestamp = Timestamp"),\ caf.AggCount("Size"),\ caf.AggSum("Shares = Size", "Dollars"),\ caf.AggFirst("OpenPrice = Last"),\ caf.AggMax("HighPrice = Last"),\ caf.AggMin("LowPrice = Last"),\ caf.AggAvg("AvgPrice = Last"),\ caf.AggWAvg("Size", "WtdAvgPrice = Last"),\ caf.AggVar("VarPrice = Last"),\ caf.AggStd("StdPrice = Last"),\ caf.AggMed("MedianPrice = Last"),\ caf.AggPct(0.75, "Perc75Price = Last"),\ ), "Date", "USym")\ # see the two keys -- Date & Sym, so agg is within each unique pair # note the ability to rename columns during the process # observe the updateView of Dollars then used in the aggregation # weighted average is nice syntax # JOIN DATA FROM MULTIPLE TABLES # IMPORTANT: For SQL developers → Joins in Deephaven are used to extend result sets by joining data # from other tables, not so much for filtering. ## Joins that get used a lot # IMPORTANT: NATURAL JOIN # Vital to know: The right table of the join needs to have only one match based on the key(s). # run these together lastPriceTable = staticSource1.view("USym", “Last", “Timestamp”)\ .lastBy("USym")\ .renameColumns("LastPrice = Last", "LastTime = Timestamp") # creates 3-column table of USym + last record of the other 2 cols joinWithLastPrice = staticSource1.naturalJoin(lastPriceTable, "USym") # arguments are (rightTableOfJoin, "JoinKey(s)") # will have same number of rows as staticSource1 (left table) # brings all non-key columns from lastPriceTable to staticSource1 # HEAVILY USED! # conceptually similar to Excel vlookup() specifyColsFromR = staticSource1.naturalJoin(lastPriceTable, "USym", "LastPrice") # nearly identical to joinWithLastPrice example # args are (rightTableOfJoin, "JoinKey(s)", "Cols from R table") renameColsOnJoin = staticSource1\ .naturalJoin(lastPriceTable,"USym","LP = LastPrice, LastTime") # nearly identical to specifyColsFromR example # can rename column on the join # sometimes this is necessary # if there would be 2 cols of same name # Note the placement of quotes keysOfDiffNames = staticSource2.view("Ticker", "Close")\ .naturalJoin(lastPriceTable, "Ticker = USym") # note that Ticker in the L table is mapped as the key to USym in R # this result has many null records, as there are many Tickers # without matching USyms # IMPORTANT # Multiple keys! # run these together lastPriceTwoKeys = staticSource1.view("Date", "USym", "Last")\ .lastBy("Date", "USym")\ .renameColumns("LastPrice = Last") # creates 3-column table of LastPrice for each Date-USym pair natJoinTwoKeys = staticSource1\ .naturalJoin(lastPriceTwoKeys, "Date, USym", "LastPrice") # arguments are (rightTableOfJoin, "JoinKey1, JoinKey2", "Col(s)") # Note the placement of quotes # IMPORTANT: AJ (AS-OF JOIN) # As-of joins are the time series joins vital to the capital markets’ use cases # it is often wise to make sure the Right-table is sorted (based on the key) # aj is designed to find "the exact match" of the key or "the record just before" # for timestamp aj-keys, this means that time or the record just before # NEEDED: # MUST RUN "justAapl" query below to support all other aj examples! justAapl = staticSource1.where("USym = `AAPL`").view("Timestamp", "AaplPrice = Last") # just the AAPL trade rows ajTimeSeriesJoin = staticSource1.aj(justAapl, "Timestamp") # joins by looking at Timestamp in staticSource1 (Left Table) # and finding exact match or "most adjacent before" Timestamp # in justAapl (Right Table) rajTimeSeriesJoin = staticSource1.raj(justAapl, "Timestamp") # note the ‘r’ in front of the aj # reverse aj finds the matching timestamp or the one after ajPickAndNameCols = staticSource1\ .aj(justAapl, "Timestamp", "AaplPrice, AaplTime = Timestamp") # Note the placement of quotes ajKeysDiffNames = staticSource1.dropColumns("Timestamp")\ .aj(justAapl, "ExchangeTimestamp = Timestamp") # ExchangeTimestamp from Left Table drives join with Timestamp # from Right Table # the fundamental below is used for TCA, slippage, simulation, etc. ajMultipleKeysDiffNames = staticSource1.aj\ (staticSource1.updateView("Time10min = Timestamp + 10 * MINUTE"),\ "Date, USym, Timestamp = Time10min",\ "Price10Min = Last") # join a series on the same series 10 minutes later # Note the three keys -- matches Date, then USym... # then does a time-series join on the last key ## Joins that get used much less # exact joins require precisely one match in the right table for each key lastPriceAug23 = staticSource1.where("Date = `2017-08-23`").lastBy("USym") lastPriceAug24 = staticSource1.where("Date = `2017-08-24`").lastBy("USym")\ .whereIn(lastPriceAug23, "USym") # filters for USyms in lastPriceAug23 exactJoinEx = lastPriceAug23.exactJoin(lastPriceAug24, "USym", "NextClose = Last") # join will find all matches from left in the right table # if a match is not found, that row is omitted from the result # if the Right-table has multiple matches, then multiple rows are included in result lastPriceSS1 = staticSource1.lastBy("USym").view("USym", "Last") lastPriceSS2 = staticSource2.lastBy("Ticker")\ .view("USym = Ticker", "Price = Close") firstPriceSS2 = staticSource2.firstBy("Ticker")\ .view("USym = Ticker", "Price = Close") mergeFirstAndLastSS2 = ttools.merge(firstPriceSS2, lastPriceSS2) # the table has two rows per USym joinExample = lastPriceSS1.join(mergeFirstAndLastSS2, "USym", "Price") # note that the resultant set has two rows per USym # EMPLOY OTHER METHODS ## Use columns as arrays and cells as variable mostActiveUSym = staticSource1.countBy("N", "USym").sortDescending("N") getAColumn = mostActiveUSym.getColumn("USym").getDirect() # this creates a java long array of the Column (USym, in this case) getARecord = mostActiveUSym.getColumn("USym").getDirect()[1] # indexing starts at 0 getACell = mostActiveUSym.getColumn("USym").get(0) # when to use each of the last two is beyond this basic introduction # mostly relates to when iterating over an entire column print(getAColumn) print(getARecord) print(getACell) ## Read and write csv # IMPORTANT: It is very easy to import CSVs via the Code Studio UI # and to export any Table to a CSV via the Table Widget (in Code Studios and Dashboard Tables). # CSVs imported from the client need to be done via Code Studio UI. # CSVs imported from a server-side directory should be done via the script below. # NOT working examples -- The below provides syntax, but the filepaths are unknown # readCSV("filepath", "delimiter") # read CSV file on query server into table # readHeaderlessCsv("filepath", "delimiter") # writeCsv(table, "filePath") # writes a Deephaven Table out as a CSV ## Write a Table to a Namespace # though the below uses a namespace called ExampleNamespace, # it’s best practice for teams to establish a naming protocol for these directories # create some tables to use in the example aapl100 = staticSource1.where("USym = `AAPL`").head(100) goog100 = staticSource1.where("USym = `GOOG`").head(100) aapl100_wDlr = staticSource1.where("USym = `AAPL`").tail(100)\ .update("Dollars = Last * Size") # add local table from Code Studio to Deephaven servers db.addTable("ExNamespace", "ExTable", aapl100) seeTableJustAdded = db.i("ExNamespace", "ExTable").where() # append to that table db.appendTable("ExNamespace", "ExTable", goog100) nowSeeAppendedTable = db.i("ExNamespace", "ExTable").where() # remove and re-create a new table under that table name db.replaceTable("ExNamespace", "ExTable", aapl100_wDlr) nowSeeThatTable = db.i("ExNamespace", "ExTable").where() # totally remove that table # db.removeTable("ExNamespace", "ExTable") # note the below now fails because the table does not exist # thisWillFail = db.i("ExNamespace", "ExTable").where() ### Do Cum-Sum and Rolling Average # create a function to determine if an array of Tickers has only this-Ticker in it from deephaven import * def hasOthers(array, target): for x in array: if x != target: return True return False ### makeArrays= staticSource2.updateView("TickerArray10 = Ticker_.subArray(i-10, i-1)",\ "PriceArray10 = Close_.subArray(i-10, i-1)",\ "VolumeArray10 = Volume_.subArray(i-10, i-1)") ### CumAndRoll10 = makeArrays.update("ArrayHasOtherTickers = \ hasOthers.call(TickerArray10.toArray(), Ticker)",\ "CumSum10 = ArrayHasOtherTickers = false ? \ (int)sum(VolumeArray10) : NULL_INT",\ "RollAvg10 = ArrayHasOtherTickers = false ? \ avg(PriceArray10) : NULL_DOUBLE") ### Another Example of Creating a Rolling Sum def rollingSum(rows, values): """ Calculate a rolling sum from a java int array :param rows: size of the rolling sum (i.e. number of rows to sum over) :param values: :return: """ calculations = jpy.array('int', values.size()) # create an array of integers for our rolling sum value sum_ = 0 # our running sum (avoid builtin symbol sum) for i in range(values.size()): sum_ += values.get(i) if i >= rows: # subtract from the rolling sum when needed sum_ -= values.get(i - rows) calculations[i] = sum_ return calculations trades = db.t("LearnDeephaven", "StockTrades").where("Date=`2017-08-25`")\ .view("Sym", "Size")\ .headBy(100, "Sym") symGroups = trades.by("Sym") sums = symGroups.updateView("RollingSum=(int[]) rollingSum.call(20, Size)") ungroup = sums.ungroup() ### Setting up an EMA # reference requisite libraries from deephaven import * from deephaven.DBTimeUtils import * ByEmaSimple = jpy.get_type("com.illumon.numerics.movingaverages.ByEmaSimple") BadDataBehavior = jpy.get_type("com.illumon.numerics.movingaverages.ByEma$BadDataBehavior") MaMode = jpy.get_type("com.illumon.numerics.movingaverages.AbstractMa$Mode") TimeUnit = jpy.get_type("java.util.concurrent.TimeUnit") # configure EMA parameters # note TIME below ema_price_10min = ByEmaSimple(BadDataBehavior.BD_SKIP, BadDataBehavior.BD_SKIP, MaMode.TIME, 10.0, TimeUnit.MINUTES) ema_price_60min = ByEmaSimple(BadDataBehavior.BD_SKIP, BadDataBehavior.BD_SKIP, MaMode.TIME, 60.0, TimeUnit.MINUTES) # see TICK below ema_price_100ticks = ByEmaSimple(BadDataBehavior.BD_SKIP, BadDataBehavior.BD_SKIP, MaMode.TICK, 100, None) # calculate EMA within update (or updateView, select, view) method withEmas = staticSource1.update( "EmaMin10=ema_price_10min.update(Timestamp, Last, USym)",\ "EmaMin60=ema_price_60min.update(Timestamp, Last, USym)",\ "EmaTick100=ema_price_100ticks.update(Timestamp, Last, USym)") # notice the EMA is calculated by USym # TradingHourseEma also exists, but is beyond the scope of this document # That can be tied to a business calendar. # Datetime method diffBusinessNanos() can also be helpful for that. ### Use CumulativeUtil # The CumulativeUtil class provides methods to simplify rolling and cumulative # aggregations of double values cu = jpy.get_type("com.illumon.iris.db.v2.utils.CumulativeUtil") t=db.t("LearnDeephaven","StockTrades")\ .where("Date=`2017-08-25`")\ .update("Volume=Last*Size") t2=t.sort("Timestamp") CumulativeMarketSum=cu.cumSum(t2,"CumulativeVolume","Volume") CumulativeSumBySymbol=cu.cumSum(t,"Sym","CumulativeVolume","Volume") CumulativeAvgBySymbol=cu.cumSum(t,"Sym","CumulativeAvgVolume","Volume")\ .update("CumulativeAvgVolume=CumulativeAvgVolume/(ii+1)") # When using these methods with intraday (ticking) tables, it must be ensured that the appends of new rows are the only changes that happen to the source tables. This can be done using the assertAppendOnly method. If changes other than appends occur in the underlying table, an exception will be thrown, ending the query. This ensures that the results returned by CumulativeUtil methods are accurate. ta = jpy.get_type("com.illumon.iris.db.tables.verify.TableAssertions") tickingTable = db.i("DbInternal", "ProcessEventLog").where("Date=currentDateNy()") appendOnlyTickingTable = ta.assertAppendOnly(tickingTable) # CumulativeUtil methods would then be applied to appendOnlyTickingTable. # USE NUMPY AND PANDAS from deephaven import * from deephaven import npy from deephaven import tableToDataFrame import numpy as np import pandas as pd # note these conversions are less immediately available than DH tables # constrained by memory available # no updates -- ticking sources are snapshotted pandasDF = tableToDataFrame(staticSource1.where("Date = `2017-08-25`")) # do Pandas stuff to prove it is a Pandas Dataframe print(type(pandasDF)) print(pandasDF.dtypes) print(pandasDF.describe()) # if all the columns can be of the same data type... # and if the data set is very large, # it may be faster to make a Pandas Dataframe via Numpy justNumbers = staticSource1.view("Last", "Size", "ExchangeId") pandasDF_3Cols = pd.DataFrame( data=npy.numpy_slice(justNumbers, 0, justNumbers.size(), dtype=np.float32), columns=justNumbers.getDefinition().getColumnNamesArray() ) print(type(pandasDF_3Cols)) print(pandasDF_3Cols.dtypes) # get an example table to work with aaplTrades = staticSource1.where("Date = `2017-08-25`", "USym = `AAPL`") # IMPORTANT: Conversions from DH Tables to NumPy arrays are to only one-dimensional arrays! # make numPy arrays of single columns npArray_Price = columnToNumpyArray(aaplTrades, "Last") npArray_TradeSize = columnToNumpyArray(aaplTrades, "Size") # do NumPy stuff print(type(npArray_Price)) print(npArray_Price.size) print(npArray_Price[10:20]) print(npArray_Price.max()) print(npArray_TradeSize.sum()) print(np.unique(npArray_TradeSize)) # multiply arrays npArray_Dollars = np.multiply(npArray_Price, npArray_TradeSize) # make a 3xN NumPy array np3Cols = np.stack((npArray_Price, npArray_TradeSize, npArray_Dollars), axis = 1) print(np3Cols.shape) # make a Pandas DF from the 3XN NumPy Array pandasDF_fromNumPy = pd.DataFrame(np3Cols, columns = ['Price', 'Shares', 'Dollars']) print(pandasDF_fromNumPy.aggregate(['sum', 'min', 'max', 'average', 'median'])) # turn Pandas DF into Deephaven Table dhTable = dataFrameToTable(pandasDF) # FORMAT TABLES ## Datetime Formatting timeFormatting = staticSource1.view("Time1 = Timestamp", "Time2 = Timestamp", "Time3 = Timestamp", "Time4 = Timestamp", "Time5 = Timestamp", "Time6 = Timestamp")\ .formatColumns("Time1=Date(`yyyy_MM_dd'T'HH-mm-ss.S z`)", "Time2 = Date(`yyyy-MM-dd'T'HH:mm t`)", "Time3 = Date(`HH:mm:ss`)", "Time4 = Date(`HH:mm:ss.SSSSSSSSS`)", "Time5 = Date(`EEE dd MMM yy HH:MM:ss`)", "Time6 = Date(`yyyy-MM-dd`)")\ .updateView("Time7_string = formatDate(Time6, TZ_NY)") ## Number Formatting numberFormatting = staticSource1.view("Last",\ "BigNum1 = Last * 1000", "BigNum2 = Last * 1000",\ "BigNum3 = Last * 1000", "Price1 = Last", "Price2 = Last",\ "Price3 = Last",\ "SmallNum1 = Last / 1000", "SmallNum2 = Last / 1000",\ "SmallNum3 = Last / 1000", "TinyNum = Last / 1_000_000_000")\ .formatColumns("BigNum1 = Decimal(`###,#00`)",\ "BigNum2 = Decimal(`##0.00`)",\ "BigNum3 = Decimal(`0.####E0`)",\ "Price1 = Decimal(`###,###.##`)",\ "Price2 = Decimal(`$###,##0.00`)",\ "SmallNum1 = Decimal(`##0.000000`)",\ "SmallNum2 = Decimal(`##0.00%`)",\ "SmallNum3 = Decimal(`##0%`)",\ "TinyNum = Decimal(`0.00E0`)") # run these next three tables together # create table for the example numTable = staticSource1.view("Last",\ "Random1 = Last * (new Random().nextDouble())",\ "Random2 = Last * (new Random().nextDouble())",\ "PriceLong = (long)Last", "PriceInt = (int)Last",\ "BigNumFloat = (float)(Last * 1000)") # efficient syntax to round many float and double columns across a table from deephaven import * roundColumns = ttools.roundDecimalColumns(numTable) roundMostColums = ttools.roundDecimalColumnsExcept(numTable, "Last") ## Color Formatting justColors = staticSource1\ .formatColumns("Timestamp = `#90F060`","SecurityType = LIGHTBLUE",\ "Exchange = colorRGB(247,204,0)",\ "USym = colorRGB(247,204,204)",\ "Sym = bgfg(colorRGB(57,43,128),colorRGB(243,247,122))",\ "Last = bgfga(MEDIUMVIOLETRED)",\ "Size = colorHSL(0, 24, 36)") # need this for the last part of conditionalColors script from deephaven import * colorMe = DistinctFormatter() conditionalColors = staticSource1.where("Date = `2017-08-25`")\ .updateView("RowMod10 = i % 10")\ .formatColumnWhere("Source", "SaleCondition = `@FTI`", "DEEP_RED")\ .formatColumnWhere("USym", "Last > Last_[i-1]", "LIMEGREEN")\ .formatRowWhere("ExchangeId % 10 = 0", "colorHSL(260, 94, 52)")\ .formatColumns("Size = (Size % 5 !=0) ? ORANGE : BLACK",\ "SecurityType = (Exchange = `Nasdaq` && SaleCondition = `@TI`) ? colorRGB(253, 31, 203) : colorRGB(171, 254, 42)",\ "RowMod10 = heatmap(RowMod10, 1.0, 10.0, BRIGHT_GREEN, BRIGHT_RED)",\ "Exchange = colorMe.getColor(Exchange)") # list of canned colors found here: # https://docs.deephaven.io/latest/Content/User/misc/colorsAll.htm?Highlight=color # one can formatColumns() for numbers and colors together numberAndColor = staticSource1.formatColumns("Last = Decimal(`##0.00`)",\ "Last = (Last > Last_[i-1]) ? FUCHSIA : GREY") PLOT PROGRAMMATICALLY # Substantial documentation about plotting exists # https://docs.deephaven.io/latest/Content/writeQueries/plot/overview.htm # The below intends to show the basics # in particular, everything about styling and labeling is omitted from the below # https://docs.deephaven.io/latest/Content/writeQueries/plot/components/index.htm #https://docs.deephaven.io/latest/Content/writeQueries/plot/visual.htm#Visual_Formatting from deephaven import * from deephaven import Calendars from deephaven import Plot # create some tables to use in the plot examples cal = Calendars.calendar("USNYSE") fourUSym = staticSource1.where("USym in `GOOG`,`AAPL`,`CSCO`, `MSFT`",\ "cal.isBusinessTime(ExchangeTimestamp)", "SaleCondition!=`@W`",\ "inRange(Size, 100, 10_000)")\ .sort("Date", "USym", "ExchangeTimestamp") fourUSymOneDay = fourUSym.where("Date = `2017-08-25`") fourUSymBest = fourUSymOneDay\ .naturalJoin(fourUSymOneDay.firstBy("USym"), "USym", "Open = Last")\ .updateView("PercChange = Last / Open - 1")\ .aj(fourUSymOneDay.where("USym = `MSFT`"),\ "ExchangeTimestamp", "MsftPrice = Last") justAaplMultiday = fourUSym.where("USym = `AAPL`") justAaplBest = fourUSymBest.where("USym = `AAPL`") dlrVolume = staticSource2.where("Ticker in `AAPL`, `CSCO`, `GOOG`,\ `MSFT`").update("DlrVolume = Close * Volume",\ "Month = formatDate(EODTimestamp, TZ_NY).substring(0,7)") # Time series plots timeSeriesPlot = Plot.plot("", justAaplBest, "ExchangeTimestamp", "Last").show() # note .show() timeSeries2axes = Plot.plot("MSFT Series", fourUSymBest.where("USym = `MSFT`"),\ "ExchangeTimestamp", "Last")\ .twinX()\ .plot("GOOG Series", fourUSymBest.where("USym = `GOOG`"),\ "ExchangeTimestamp", "Last").show() # note the first parameter .plot("this-one"... could not be "" # see .twinX() timeSeries3axes = Plot.plot("MSFT Series", fourUSymBest.where("USym = `MSFT`"),\ "ExchangeTimestamp", "Last")\ .twinX()\ .plot("GOOG Series", fourUSymBest.where("USym = `GOOG`"),\ "ExchangeTimestamp", "Last")\ .twinX()\ .plot("CSCO Series", fourUSymBest.where("USym = `CSCO`"),\ "ExchangeTimestamp", "Last").show() # can do more than 3 # Multiple series on the same axis timeSeriesSameAxis = Plot.plot("MSFT %-Chge", fourUSymBest.where("USym = `MSFT`"),\ "ExchangeTimestamp", "PercChange")\ .plot("GOOG %-Chge", fourUSymBest.where("USym = `GOOG`"),\ "ExchangeTimestamp", "PercChange").show() # no .twinX() in this case # Bar chart barChart = Plot.plot("", dlrVolume.where("Ticker = `AAPL`"),\ "EODTimestamp", "DlrVolume")\ .plotStyle("Bar").show() # Plot-by-some key plotBySeries = Plot.plotBy("Percent Change", fourUSymBest, "ExchangeTimestamp",\ "PercChange", "USym").show() # see that the last overload "USym", in this case, is the key # in this case this will create 4 series for the respective 4 USyms # Stacked Area stackedArea = Plot.plotBy("Dlr Volume", dlrVolume, "EODTimestamp",\ "DlrVolume", "Ticker")\ .plotStyle("stacked_area").show() # Area graph viewAsArea = timeSeriesSameAxis.plotStyle("Area").show() # see that it references the plotting object of the previous script # Business time uglyRawMultiday = Plot.plot("", justAaplMultiday, "ExchangeTimestamp", "Last")\ .show() # use xBusinessTime() to get rid of overnight and weekend time prettyWithBizTimeConstraint = Plot.plot("", justAaplMultiday, "ExchangeTimestamp",\ "Last")\ .xBusinessTime().show() # this relies on the prevailing biz-time calendar that is set # Scatter scatterPlot = Plot.plot("", justAaplBest.where("i % 10 = 0"), "MsftPrice", "Last")\ .plotStyle("scatter").show() # Note the manual down-sampling above # Without it, for large tables (this one is 87k records) the UI will # make you choose whether you really want to graph so many points # Deephaven smart-auto-downsampling is only for line plots # Histogram plotHistogram = Plot.histPlot("", justAaplBest, "Last", 20).show() catHistogramBySym = Plot.catHistPlot("", fourUSymBest, "USym").show() # de facto count of rows # Open-high-low-close plotOHLC = Plot.ohlcPlot("", staticSource2.where("Ticker = `AAPL`"), "EODTimestamp", "Open", "High", "Low", "Close").show() # syntax exists to profile data into time bins with OHLC # Error Bar Plot errorBarTable = justAaplBest\ .updateView("TimeBin=upperBin(ExchangeTimestamp, 15 * MINUTE)")\ .by(caf.AggCombo(caf.AggAvg("AvgPrice = Last"),\ caf.AggStd("StdPrice = Last")), "TimeBin")\ .updateView("Low = AvgPrice - StdPrice", "Hi = AvgPrice + StdPrice") errorBarPlot = Plot.errorBarY("", errorBarTable,"TimeBin",\ "AvgPrice", "Low", "Hi").show() ## One-click plotting # One-click plotting allows a script writer to designate a key that users # can enter into a one-click or linker experience, thereby toggling # the plot to filter on the fly # Users will need to add an Input Filter or configure the Linker to their UI # to manipulate the One-Click plots. # See https://docs.deephaven.io/latest/Content/web/controls.htm oneClickEx = Plot.oneClick(fourUSymBest, "USym") # this maps the one-click experience to the key of USym (in this case) # there is no table visible from this # in DH-speak it saves the OneClick SelectableDataSet to the variable oneClickPlot = Plot.plot("USym", oneClickEx, "ExchangeTimestamp", "Last").show() # Another one-click example datasetHolder2 = Plot.oneClick(dlrVolume.view("Month", "EODTimestamp", "DlrVolume")\ .sumBy("Month", "EODTimestamp").sort("EODTimestamp"), "Month")\ oneClickPlot2 = Plot.plot("Dlr Volume", datasetHolder3, "EODTimestamp","DlrVolume")\ .plotStyle("Area").show() # change the Input Filter in the UI to 'Month' # then, for example, type in that box 2017-03