# ACCESS DATA
## Sourcing tables
# NEEDED: The named Tables in these two lines are used throughout
# the pages of examples below.

# Note: the tableName left of the "= sign" is a variable named whatever you want
allAvailable = db.getCatalog() # full catalog of namespaces and tables

staticSource1  = db.t("LearnDeephaven", "StockTrades")  # historical data
staticSource2  = db.t("LearnDeephaven", "EODTrades")  # historical EOD data


tickingSource = db.i("DbInternal", "ProcessEventLog")
# access a real-time updating table!
# intraday data
# note: db.i instead of db.t

from deephaven import * # accesses a necessary library for the below
snapshotEx = db.i("DbInternal", "ProcessEventLog", False) # intraday data w/o ticking
## Merging tables
copy1 = staticSource1.where() # 3.16 mm records
copy2 = staticSource1.where() # same 3.16 mm records in a table of a different name
mergeBothCopies = ttools.merge(copy1, copy2)
			# to merge tables, the schema must be identical 
			# same column names, same column data types

# merging updating (ticking) tables follows the same standard
copyUpdating1 = tickingSource.where() 
copyUpdating2 = tickingSource.where() 
mergeBothCopies = ttools.merge(copyUpdating1, copyUpdating2)
			# same schema for each of the tables to be merged

# one can merge static and ticking tables
staticSource1v2 = staticSource1.where().view("Date", "Timestamp", "SourceRowNum = i")
tickingSourcev2 = tickingSource.where().view("Date", "Timestamp", "SourceRowNum = i")
mergeStaticAndUpdating = ttools.merge(staticSource1v2, tickingSourcev2)
# again, same schema for each of the tables to be merged
# in the UI right-click on the table heading, then
# hit Reverse/Before-sorting from the menu -- updates at top

## Viewing metadata of a table
seeMetadata = staticSource1.getMeta() # column names, data types, partitions, groups

## Printing information
print(db.getNamespaces()) # just the namespaces, not also the tables
print (db.getIntradayPartitions("DbInternal", "ProcessEventLog")) # see partitions

from deephaven.Calendars import calendarNames 
print(calendarNames()) # list of business calendars shipped with Deephaven

# describes the packages available in the main deephaven module
help('deephaven')
# FILTER

# VERY IMPORTANT: Backticks (`) are used for strings and single quotes (') are used for times.

# VERY IMPORTANT: Most queries benefit by starting with filters.  Less data generally means better performance.

# For SQL developers: In Deephaven, Joins are not a primary operation for filtering.  
# Use where(), whereIn(), and whereNotIn().


## Date & Time examples
todaysData1 = tickingSource.where("Date = currentDay()") # Date for this is a String
todaysData2 = tickingSource.where("Date = currentDateNy()") # popular for NY
todaysData3 = tickingSource.where("Date = currentDate(TZ_SYD)") # sydney timezone

# VERY IMPORTANT: Filtering first by partitioning values (which is most often the set of Dates) is both a best practice, 
# and necessary for some downstream use cases.

singleStringDate = staticSource1.where("Date = `2017-08-23`") # HEAVILY USED!
lessThanDate = staticSource1.where("Date < `2017-08-23`") # use >=, etc. as well
oneMonthString = staticSource1.where("Date.startsWith(`2017-08`)")

singleDbDate = staticSource2.where("formatDate(EODTimestamp, TZ_NY) = `2017-03-01`")
afterDbDatetime = staticSource1.where("Timestamp > '2017-08-25T10:30:00 NY'")

justBizTime = staticSource1.where("isBusinessTime(Timestamp)") # HEAVILY USED!
justAugustDatetime = staticSource2.where("monthOfYear(EODTimestamp, TZ_NY) = 8") 
just10amHour = staticSource1.where("hourOfDay(Timestamp, TZ_NY) = 10")
justTues = staticSource2.where("dayOfWeek(EODTimestamp).getValue() = 2") # Tuesdays


## String examples
oneStringMatch = staticSource1.where("USym = `AAPL`") # match filter 

stringSetMatch = staticSource1.where("USym in `AAPL`, `MSFT`, `GOOG`") 
caseInsensitive = staticSource1.where("USym icase in `aapl`, `msft`, `goog`")
notInExample = staticSource1.where("USym not in `AAPL`, `MSFT`, `GOOG`") # see "not"

containsExample = staticSource1.where("USym.contains(`I`)")
notContainsExample = staticSource1.where("!USym.contains(`I`)")
startsWithExample = staticSource1.where("USym.startsWith(`A`)")
endsWithExample = staticSource1.where("USym.endsWith(`M`)")


## Number examples
equalsExample = staticSource2.where("round(Open) = 44")
lessThanExample = staticSource2.where("High < 8.42") 
someManipulation = staticSource2.where("(Close - Open) / Open > 0.05")
modExample1 = staticSource2.where("i % 10 = 0") # every 10th row
modExample2 = staticSource2.where("Ticker.length() % 2 = 0") 
# even char-count Tickers

## Multiple filters
conjunctiveComma = staticSource1.where("Date = `2017-08-23`", "USym = `AAPL`") 
# HEAVILY USED!
conjunctiveAmpA = staticSource1.where("Date = `2017-08-23` && USym = `AAPL`")

disjunctiveSameCol = staticSource1.where("Date = `2017-08-23`\
|| Date = `2017-08-25`") # thisDate or thatDate
disjunctiveDiffCol = staticSource1.where("Date = `2017-08-23` || USym = `AAPL`")
			# thisDate or thatUSym


rangeLameWay = staticSource1.where("Date >= `2017-08-21`", "Date <= `2017-08-23`")
inRangeBest = staticSource1.where("inRange(Date, `2017-08-21`, `2017-08-23`)")
# HEAVILY USED!

inRangeBestString = staticSource1.where("inRange(USym.substring(0,1), `A`, `D`)")
			# starts with letters A - D
## WhereIn / WhereNotIn
# run these two queries together
usymSetDriver = staticSource1.renameColumns("Ticker = USym")
whereInExample = staticSource2.whereIn(usymSetDriver, "Ticker") # HEAVILY USED!
# filters staticSource2 USyms to be the same as
# the set of USyms in staticSource1 to be
# the same as in usymSetDriver
# this can be dynamic to accommodate changing usymSetDriver data

whereInExample = staticSource2.whereIn(staticSource1, "Ticker = USym")\
			# Ticker in staticSource2 within list of USym of staticSource1
whereNotInExample = staticSource2.whereNotIn(staticSource1, "Ticker = USym")\
# see the “not”
## Nulls and NaNs
nullExample = staticSource1.where("isNull(ExchangeId)") # all data types supported
notNullExample = staticSource1.where("!isNull(ExchangeId)")

nanExample = staticSource1.where("isNaN(Last)")
notNanExample = staticSource1.where("!isNaN(Last)")

neitherNanNull = staticSource1.where("isNormal(Last)") # normal = not null nor NaN
eitherNanNull = staticSource1.where("!isNormal(Last)") # not normal
## Head and Tail  
first10k = staticSource2.head(10_000)
last10k = staticSource2.tail(10_000)
# note this would deliver a changing set if intraday 
# (updating) data

first15Perc = staticSource2.headPct(0.15) # the first 15% of rows
last15Perc = staticSource2.tailPct(0.15) # the last 15% of rows

first20byKey = staticSource1.headBy(20, "USym") # first 20 rows for each USym
last20byKey = staticSource1.tailBy(20, "USym") # last 20 rows for each USym

# SORT

oneSortTime = staticSource1.sort("Timestamp") # works for all data types, actually
oneSortNumber = staticSource1.sort("Last") # sorting a number
oneSortString = staticSource1.sort("Exchange") # sorting a string

sortDescExample = staticSource1.sortDescending("Timestamp") # highest to lowest

multiSortClassic = staticSource1.sort("Exchange","Timestamp") # Exchange then Time

# run these below together
SortPair = jpy.get_type("com.illumon.iris.db.tables.SortPair")
sortPairs = staticSource1.sort(SortPair.ascending("Exchange"),\
SortPair.descending("Timestamp"))
# Exchange ascending then Timestamp descending

reverseTable = staticSource1.reverse() # HEAVILY USED! Very cheap to support GUIs
reverseUpdating = tickingSource.reverse() # updates then come to the top of table


# SELECT AND CREATE NEW COLUMNS

# VERY IMPORTANT: Use select() and update() when (i) expensive to calculate or (ii) accessed frequently.

## Version 1:  Choose and add new columns -- Calculate and write to memory

selectColumns = staticSource2.select("EODTimestamp", "Ticker", "Open", "Close")
# constrain to only those 4 columns, write to memory
selectAddCol = staticSource2.select("EODTimestamp", "Ticker", "Open", "Close",\
"Change = Close - Open")
# constrain and add a new column calculating 

selectAndUpdateCol = staticSource2.select("EODTimestamp", "Ticker", "Open",\
"Close").update("Change = Close - Open")
# add a new column calculating - logically equivalent to previous
# example

# VERY IMPORTANT: Use view() and updateView() when (i) formula is quick or 
# (ii) only a portion of the data is used at a time.  Minimizes RAM used.  

## Version 2:  Choose and add new columns -- Reference a formula and calc on the fly

viewColumns = staticSource2.view("EODTimestamp", "Ticker", "Open", "Close")
# similar to select(), though uses on-demand formula
viewAddCol = staticSource2.updateView("EODTimestamp", "Ticker", "Open", "Close",\
"Change = Close - Open")
# view set and add a column, though with an on-demand formula

viewAndUpdateViewCol = staticSource2.view("EODTimestamp", "Ticker", "Open",\
"Close").updateView("Change = Close - Open")
# logically equivalent to previous example

# VERY IMPORTANT: Use lazyUpdate() when there are a small-ish number of unique values; 
# on-demand formula results are stored in cache and re-used.  

## Version 3:  Add new columns -- Reference a formula and calc on the fly

lazyUpdateEx = staticSource2.lazyUpdate("EODTimestamp", "Ticker", "Open", "Close",\
 		"Change = Close - Open")
# similar to update(), though uses on-demand formula

## Things one can do within updateView (or equivalently update, select, view) methods:

### GET THE ROW NUMBER
getTheRowNumber = staticSource2.updateView("RowNum = i")

### DO SOME MATH
simpleMath = staticSource2.updateView("RandomNumber = Math.random()",\
"RandomInt100 = new Random().nextInt(100)",\
"Arithmetic = Close * Volume",\
"SigNum = signum(RandomNumber - 0.5)",\
"Signed = SigNum * Arithmetic",\
"AbsDlrVolume = abs(Signed)",\
"Power = Math.pow(i, 2)",\
"Exp = Close * 1E2",\
"Log = Close * log(2.0)",\
	"Round = round(Close)",\
"Floor = floor(Close)",\
"Mod = RandomInt100 % 5",\
"CastInt = (int)AbsDlrVolume",\
"CastLong = (long)AbsDlrVolume")

### HANDLE ARRAYS
arrayExamples = staticSource2.updateView("RowNumber = i",\
"PrevRowReference = Close_[i-1]",\
"MultipleRowRef = Close_[i-2] - Close_[i-1]",\
"SubArray = Close_.subArray(i-2, i+1)",\
"SumSubArray = sum(Close_.subArray(i-2, i+1))",\
"ArraySize = SubArray.size()",\
"ArrayMin = min(SubArray)",\
"ArrayMax = max(SubArray)",\
"ArrayMedian = median(SubArray)",\
"ArrayAvg = avg(SubArray)",\
"ArrayStd = std(SubArray)",\
"ArrayVar = var(SubArray)",\
"ArrayLast = last(SubArray)",\
"ArrayIndex = SubArray[1]",\
"InArray = in(45.71, SubArray)")

### CALCULATE PERCENTILE IN A SERIES
viewAndSortPrices = staticSource2.view("Date = formatDate(EODTimestamp, TZ_NY)",\
"Ticker", "ClosingPrice = Close")\
.sort("Ticker", "ClosingPrice")
closingPriceArraysByTicker = viewAndSortPrices.by("Ticker")
# join two tables above using natural join
priceAndArray = viewAndSortPrices.naturalJoin(closingPriceArraysByTicker, "Ticker",\
"PriceArray = ClosingPrice").sort("Ticker", "Date")
#use binSearch to drive the percentage calculation
percRank =  priceAndArray.updateView("ArraySize = PriceArray.size()",\
"BinRank = binSearchIndex(PriceArray, ClosingPrice, BS_LOWEST)",\
"PercRank = BinRank / ArraySize",\
"Perc25 = percentile(PriceArray, 0.25)",\
"Median = median(PriceArray)",\
"Perc75 = percentile(PriceArray, 0.75)")


### MANIPULATE TIME AND CALENDARS
timeStuff = staticSource2.updateView("CurrentTime = currentTime()",\
"CurrentDateDefault = currentDay()",\
"CurrentDateNy = currentDateNy()",\
"CurrentDateLon = currentDate(TZ_LON)",\
"LastBizDateNy = lastBusinessDateNy()",\
"IsAfter = CurrentTime > EODTimestamp",\
"IsBizDay = isBusinessDay(CurrentDateLon)",\
"StringDT = format(EODTimestamp, TZ_NY)",\
"StringDate = formatDate(EODTimestamp, TZ_NY)",\
"StringToTime = convertDateTime(StringDate + `T12:00 NY`)",\
"AddTime = EODTimestamp + '05:11:04.332'",\
"PlusHour = EODTimestamp + HOUR",\
"LessTenMins = EODTimestamp - 10 * MINUTE",\
"DiffYear = diffYear(EODTimestamp, currentTime())",\
"DiffDay = diffDay(EODTimestamp, CurrentTime)",\
"DiffNanos = PlusHour - EODTimestamp",\
"DayWeek = dayOfWeek(EODTimestamp, TZ_NY)",\
"HourDay = hourOfDay(EODTimestamp, TZ_NY)",\
"DateAtMidnight = dateAtMidnight(EODTimestamp, TZ_NY)",\
"IsBizDayString = isBusinessDay(StringDate)",\
"IsBizDayDatetime = isBusinessDay(EODTimestamp)",\
"IsBizTime = isBusinessDay(EODTimestamp)",\
"FracBizDayDone = fractionOfBusinessDayComplete(currentTime())",\
"FracBizDayOpen = fractionOfBusinessDayRemaining(currentTime())",\
"NextNonBizDay = nextNonBusinessDay()",\
"NextBizDayCurrent = nextBusinessDay()",\
"NextBizDayString = nextBusinessDay(StringDate)",\
"NextBizDayDatetime = nextBusinessDay(EODTimestamp)",\
"PlusFiveBizDayCurrent = nextBusinessDay(5)",\
"PlusFBizDayString = nextBusinessDay(StringDate, 5)",\
"PlusFBizDayDatetime = nextBusinessDay(EODTimestamp, 5)",\
"PrevBizDayCurrent = previousBusinessDay()",\
"PrevBizDayString = previousBusinessDay(StringDate)",\
"PrevBizDayDatetime = previousBusinessDay(EODTimestamp)",\
"MinusFiveBizDayCurrent = previousBusinessDay(5)",\
"MinusFiveBizDayString = previousBusinessDay(StringDate, 5)",\
"MinusFiveBizDayDatetime = previousBusinessDay(EODTimestamp, 5)",\
"BizDayArray = businessDaysInRange(EODTimestamp, currentTime())",\
"NonBizDayArray = nonBusinessDaysInRange(EODTimestamp, currentTime())",\
"DiffBizDaysCount = numberOfBusinessDays(EODTimestamp, currentTime())",\
"DiffBizDaysExact = diffBusinessDay(EODTimestamp, currentTime())",\
"DiffBizDaysString = numberOfBusinessDays(MinusFiveBizDayString, StringDate)",\
"StandardBizDayNanos = standardBusinessDayLengthNanos()",\
"DiffBizSecs = diffBusinessNanos(CurrentTime, CurrentTime + 5 * DAY) / 1E9",\
"LastBizOfMonth = isLastBusinessDayOfMonth(StringDate)",\
"LastBizOfWeek = isLastBusinessDayOfWeek(currentTime())")

### BIN DATA
bins = staticSource1.updateView("PriceBin = upperBin(Last, 20)",
"SizeBin = lowerBin(Size, 100)",\
"TimeBin = upperBin(ExchangeTimestamp, '00:15:00')")
aggBin = bins.view("TimeBin", "Dollars = Last * Size").sumBy("TimeBin")
			# binning is a great pre-step for aggregating to support
			# the down-sampling or other profiling of data

### MANIPULATE STRINGS
stringStuff = staticSource2.view("StringDate = formatDate(EODTimestamp, TZ_NY)",\
"Ticker","Close",\
"NewString = `new_string_example_`",\
"ConcatV1 = NewString + Ticker",\
"ConcatV2 = NewString + `Note_backticks!!`",\
"ConcatV3 = NewString.concat(Ticker)",\
"ConcatV4 = NewString.concat(`Note_backticks!!`)",\
"StartBool = Ticker.startsWith(`M`)",\
"NoEndBool = !Ticker.endsWith(`OG`)",\
"ContainedBool = Ticker.contains(`AA`)",\
"NoContainBool = !Ticker.contains(`AA`)",\
"FirstChar = Ticker.substring(0,1)",\
"LengthString = Ticker.length()",\
"CharIndexPos = ConcatV1.charAt(19)",\
"SubstringEx = ConcatV1.substring(11,20)",\
"FindIt = NewString.indexOf(`_`)",\
"FindItMiddle = NewString.indexOf(`_`, FindIt + 1)",\
"FindLastOf = NewString.lastIndexOf(`_`)",\
"SplitToArrays = NewString.split(`_`)",\
"SplitWithMax = NewString.split(`_`, 2)",\
"SplitIndexPos = NewString.split(`_`)[1]",\
"LowerCase = Ticker.toLowerCase()",\
"UpperCase = NewString.toUpperCase()",\
"DoubleToStringv1 = Close + ``",\
"DoubleToStringv2 = String.valueOf(Close)",\
"DoubleToStringv3 = Double.toString(Close)",\
"StringToDoublev1 = Double.valueOf(DoubleToStringv1)")

### USE TERNARIES; IF-THENS
ifThenexample = staticSource2\
.updateView("SimpleTernary = Close < 100 ? `smaller` : `bigger`",\
            "TwoLayers = Close <= 10 ? `small` : Close < 100 ? `medium` : `large`",\
            "Conj = Close < 100 && Ticker.contains(`A`) ? `special` : `boring`",\
            "Disj = Close < 100 || Ticker.contains(`A`) ? `fave` : `other`",\
            "PrevClose = Ticker_[i-1] = Ticker ? Close_[i-1] : NULL_DOUBLE")

### CREATE AND USE A CUSTOM FUNCTION
def mult(a,b):
    return a*b
pyFuncExample = staticSource2.update("M=(double)mult.call(Close,Volume)")


# MANIPULATE COLUMNS
uniqueValues = staticSource2.selectDistinct("Ticker") # show unique set
# works on all data types - careful with doubles, longs
uniqueValuesCombo = staticSource2.selectDistinct("EODTimestamp", "Ticker") 
# unique combinations of EODTime with Ticker

renameStuff = staticSource2.renameColumns("USym = Ticker", "ClosingPrice = Close")
dropColumn = staticSource2.dropColumns("Volume") # same for drop one or many
dropColumns = staticSource2.dropColumns("ImportDate", "Open", "Volume")	

putColsAtStart = staticSource2.moveUpColumns("Ticker", "Volume") # Makes 1st col(s)
putColsWherever = staticSource2.moveColumns(1, "Ticker", "Volume")
			# makes Ticker the 2nd and Volume the 3rd column

colDescription = staticSource1.withColumnDescription("USym", "Underlying Symbol")
			# when user hovers on column header the description is visible

# GROUP AND AGGREGATE
## Simple Grouping
groupToArrays1 = staticSource1.by("USym")
			# one row per key (i.e., USym), all other columns are arrays
multipleKeys = staticSource1.by("USym", "Exchange")
			# one row for each key-combination (i.e. USym-Exchange pairs)
## Un-Grouping
# Run these lines together
aggByKey = staticSource1.by("Date")
# one row per Date, other fields are arrays from staticSource1
ungroupThatOutput = aggByKey.ungroup() # no arguments usually
			# each array value becomes its own row
			# in this case turns aggByDatetimeKey back into staticSource1

## Aggregations
# IMPORTANT: Any columns not in the parentheses of the whateverBy("Col1", "Col2") statement, 
# needs to be an ok type for that aggregation method -- i.e. sums need to have all non-key columns be numbers.  

firstByKey = staticSource1.firstBy("Date")
firstByTwoKeys = staticSource1.firstBy("Date", "USym") # All below work with multi

countOfEntireTable = staticSource1.countBy("USym") # single arg returns tot count
countOfGroup = staticSource1.countBy("N", "USym") # N is the name of the column storing the count

firstOfGroup = staticSource1.firstBy("USym")
lastOfGroup = staticSource1.lastBy("USym")

sumOfGroup = staticSource1.view("USym", "Last").sumBy("USym") 
# non-key field must be numerical
avgOfGroup = staticSource1.view("USym", "Last").avgBy("USym")
stdOfGroup = staticSource1.view("USym", "Last").stdBy("USym")
varOfGroup = staticSource1.view("USym", "Last").varBy("USym")
medianOfGroup = staticSource1.view("USym", "Last").medianBy("USym")
minOfGroup = staticSource1.view("USym", "Last").minBy("USym")
maxOfGroup= staticSource1.view("USym", "Last").maxBy("USym")
## Combined Aggregations
# combine aggregations in a single method (using the same key-grouping)
from deephaven import *
combinationAgg  = staticSource1.updateView("Dollars = Last * Size")\
.by(caf.AggCombo(\
caf.AggLast("LastPrice = Last","LastTimestamp = Timestamp"),\
caf.AggCount("Size"),\
caf.AggSum("Shares = Size", "Dollars"),\
caf.AggFirst("OpenPrice = Last"),\
caf.AggMax("HighPrice = Last"),\
caf.AggMin("LowPrice = Last"),\
caf.AggAvg("AvgPrice = Last"),\
caf.AggWAvg("Size", "WtdAvgPrice = Last"),\
caf.AggVar("VarPrice = Last"),\
caf.AggStd("StdPrice = Last"),\
caf.AggMed("MedianPrice = Last"),\
caf.AggPct(0.75, "Perc75Price = Last"),\
), "Date", "USym")\
			# see the two keys -- Date & Sym, so agg is within each unique pair
                        # note the ability to rename columns during the process
			# observe the updateView of Dollars then used in the aggregation
			# weighted average is nice syntax


# JOIN DATA FROM MULTIPLE TABLES
# IMPORTANT: For SQL developers → Joins in Deephaven are used to extend result sets by joining data
#  from other tables, not so much for filtering.
## Joins that get used a lot
# IMPORTANT: NATURAL JOIN
# Vital to know: The right table of the join needs to have only one match based on the key(s).

# run these together
lastPriceTable = staticSource1.view("USym", “Last", “Timestamp”)\
.lastBy("USym")\
.renameColumns("LastPrice = Last", "LastTime = Timestamp")
# creates 3-column table of USym + last record of the other 2 cols
joinWithLastPrice = staticSource1.naturalJoin(lastPriceTable, "USym")
			# arguments are (rightTableOfJoin, "JoinKey(s)")
			# will have same number of rows as staticSource1 (left table)
# brings all non-key columns from lastPriceTable to staticSource1
			# HEAVILY USED!
			# conceptually similar to Excel vlookup()


specifyColsFromR = staticSource1.naturalJoin(lastPriceTable, "USym", "LastPrice")
			# nearly identical to joinWithLastPrice example
# args are (rightTableOfJoin, "JoinKey(s)", "Cols from R table")

renameColsOnJoin = staticSource1\
.naturalJoin(lastPriceTable,"USym","LP = LastPrice, LastTime")
			# nearly identical to specifyColsFromR example
# can rename column on the join
# sometimes this is necessary
#         if there would be 2 cols of same name
# Note the placement of quotes


keysOfDiffNames = staticSource2.view("Ticker", "Close")\
			.naturalJoin(lastPriceTable, "Ticker = USym")
			# note that Ticker in the L table is mapped as the key to USym in R
			# this result has many null records, as there are many Tickers
			# without matching USyms
# IMPORTANT
# Multiple keys!
# run these together
lastPriceTwoKeys = staticSource1.view("Date", "USym", "Last")\
.lastBy("Date", "USym")\
.renameColumns("LastPrice = Last")
# creates 3-column table of LastPrice for each Date-USym pair
natJoinTwoKeys = staticSource1\
.naturalJoin(lastPriceTwoKeys, "Date, USym", "LastPrice")
			# arguments are (rightTableOfJoin, "JoinKey1, JoinKey2", "Col(s)")
			# Note the placement of quotes

# IMPORTANT: AJ (AS-OF JOIN)
# As-of joins are the time series joins vital to the capital markets’ use cases

# it is often wise to make sure the Right-table is sorted (based on the key) 
# aj is designed to find "the exact match" of the key or "the record just before"
# for timestamp aj-keys, this means that time or the record just before

# NEEDED: # MUST RUN "justAapl" query below to support all other aj examples!
justAapl = staticSource1.where("USym = `AAPL`").view("Timestamp", "AaplPrice = Last")
# just the AAPL trade rows
ajTimeSeriesJoin = staticSource1.aj(justAapl, "Timestamp")
			# joins by looking at Timestamp in staticSource1 (Left Table)
			# and finding exact match or "most adjacent before" Timestamp
			# in justAapl (Right Table)

rajTimeSeriesJoin = staticSource1.raj(justAapl, "Timestamp")
			# note the ‘r’ in front of the aj
			# reverse aj finds the matching timestamp or the one after

ajPickAndNameCols = staticSource1\
.aj(justAapl, "Timestamp", "AaplPrice, AaplTime = Timestamp")
# Note the placement of quotes

ajKeysDiffNames = staticSource1.dropColumns("Timestamp")\
.aj(justAapl, "ExchangeTimestamp = Timestamp")
			# ExchangeTimestamp from Left Table drives join with Timestamp
			# from Right Table

# the fundamental below is used for TCA, slippage, simulation, etc.
ajMultipleKeysDiffNames = staticSource1.aj\
(staticSource1.updateView("Time10min = Timestamp + 10 * MINUTE"),\
"Date, USym, Timestamp = Time10min",\
"Price10Min = Last")
			# join a series on the same series 10 minutes later
			# Note the three keys -- matches Date, then USym...
			# then does a time-series join on the last key

## Joins that get used much less

# exact joins require precisely one match in the right table for each key
lastPriceAug23 = staticSource1.where("Date = `2017-08-23`").lastBy("USym")
lastPriceAug24 = staticSource1.where("Date = `2017-08-24`").lastBy("USym")\
            .whereIn(lastPriceAug23, "USym")
            # filters for USyms in lastPriceAug23
exactJoinEx = lastPriceAug23.exactJoin(lastPriceAug24, "USym", "NextClose = Last")


# join will find all matches from left in the right table
# if a match is not found, that row is omitted from the result
# if the Right-table has multiple matches, then multiple rows are included in result
lastPriceSS1 = staticSource1.lastBy("USym").view("USym", "Last")
lastPriceSS2 = staticSource2.lastBy("Ticker")\
.view("USym = Ticker", "Price = Close")
firstPriceSS2 = staticSource2.firstBy("Ticker")\
.view("USym = Ticker", "Price = Close")
mergeFirstAndLastSS2 = ttools.merge(firstPriceSS2, lastPriceSS2)
# the table has two rows per USym
joinExample = lastPriceSS1.join(mergeFirstAndLastSS2, "USym", "Price")
		# note that the resultant set has two rows per USym

# EMPLOY OTHER METHODS
## Use columns as arrays and cells as variable
mostActiveUSym = staticSource1.countBy("N", "USym").sortDescending("N")

getAColumn = mostActiveUSym.getColumn("USym").getDirect()
		# this creates a java long array of the Column (USym, in this case)
getARecord = mostActiveUSym.getColumn("USym").getDirect()[1] # indexing starts at 0
getACell = mostActiveUSym.getColumn("USym").get(0) 
# when to use each of the last two is beyond this basic introduction
# mostly relates to when iterating over an entire column
print(getAColumn)
print(getARecord)
print(getACell)

## Read and write csv
# IMPORTANT: It is very easy to import CSVs via the Code Studio UI
# and to export any Table to a CSV via the Table Widget (in Code Studios and Dashboard Tables).

# CSVs imported from the client need to be done via Code Studio UI.
# CSVs imported from a server-side directory should be done via the script below.

# NOT working examples -- The below provides syntax, but the filepaths are unknown
# readCSV("filepath", "delimiter") # read CSV file on query server into table
# readHeaderlessCsv("filepath", "delimiter")

# writeCsv(table, "filePath") # writes a Deephaven Table out as a CSV


## Write a Table to a Namespace
# though the below uses a namespace called ExampleNamespace,
# it’s best practice for teams to establish a naming protocol for these directories

# create some tables to use in the example
aapl100 = staticSource1.where("USym = `AAPL`").head(100)
goog100 = staticSource1.where("USym = `GOOG`").head(100)
aapl100_wDlr = staticSource1.where("USym = `AAPL`").tail(100)\
.update("Dollars = Last * Size")

# add local table from Code Studio to Deephaven servers
db.addTable("ExNamespace", "ExTable", aapl100)
seeTableJustAdded = db.i("ExNamespace", "ExTable").where()

# append to that table
db.appendTable("ExNamespace", "ExTable", goog100)
nowSeeAppendedTable = db.i("ExNamespace", "ExTable").where()

# remove and re-create a new table under that table name
db.replaceTable("ExNamespace", "ExTable", aapl100_wDlr)
nowSeeThatTable = db.i("ExNamespace", "ExTable").where()

# totally remove that table
# db.removeTable("ExNamespace", "ExTable")
# note the below now fails because the table does not exist
# thisWillFail = db.i("ExNamespace", "ExTable").where()


###  Do Cum-Sum and Rolling Average
# create a function to determine if an array of Tickers has only this-Ticker in it
from deephaven import *
def hasOthers(array, target):
    for x in array:
        if x != target:
            return True
    return False
###
makeArrays= staticSource2.updateView("TickerArray10 = Ticker_.subArray(i-10, i-1)",\
            "PriceArray10 = Close_.subArray(i-10, i-1)",\
"VolumeArray10 = Volume_.subArray(i-10, i-1)")
###
CumAndRoll10 = makeArrays.update("ArrayHasOtherTickers = \
hasOthers.call(TickerArray10.toArray(), Ticker)",\
            "CumSum10 = ArrayHasOtherTickers = false ? \
(int)sum(VolumeArray10) : NULL_INT",\
"RollAvg10 = ArrayHasOtherTickers = false ? \
avg(PriceArray10) : NULL_DOUBLE")


### Another Example of Creating a Rolling Sum
def rollingSum(rows, values):
    """
    Calculate a rolling sum from a java int array
    :param rows: size of the rolling sum (i.e. number of rows to sum over)
    :param values:
    :return:
    """

    calculations = jpy.array('int', values.size())  # create an array of integers for our rolling sum value
    sum_ = 0  # our running sum (avoid builtin symbol sum)

    for i in range(values.size()):
        sum_ += values.get(i)
        if i >= rows:
            # subtract from the rolling sum when needed
            sum_ -= values.get(i - rows)
        calculations[i] = sum_

    return calculations


trades = db.t("LearnDeephaven", "StockTrades").where("Date=`2017-08-25`")\
    .view("Sym", "Size")\
    .headBy(100, "Sym")

symGroups = trades.by("Sym")
sums = symGroups.updateView("RollingSum=(int[]) rollingSum.call(20, Size)")
ungroup = sums.ungroup()

### Setting up an EMA
# reference requisite libraries
from deephaven import *
from deephaven.DBTimeUtils import *

ByEmaSimple = jpy.get_type("com.illumon.numerics.movingaverages.ByEmaSimple")
BadDataBehavior = jpy.get_type("com.illumon.numerics.movingaverages.ByEma$BadDataBehavior")
MaMode = jpy.get_type("com.illumon.numerics.movingaverages.AbstractMa$Mode")
TimeUnit = jpy.get_type("java.util.concurrent.TimeUnit")

# configure EMA parameters
# note TIME below
ema_price_10min = ByEmaSimple(BadDataBehavior.BD_SKIP, BadDataBehavior.BD_SKIP, MaMode.TIME, 10.0, TimeUnit.MINUTES)
ema_price_60min = ByEmaSimple(BadDataBehavior.BD_SKIP, BadDataBehavior.BD_SKIP, MaMode.TIME, 60.0, TimeUnit.MINUTES)
# see TICK below
ema_price_100ticks = ByEmaSimple(BadDataBehavior.BD_SKIP, BadDataBehavior.BD_SKIP, MaMode.TICK, 100, None)

# calculate EMA within update (or updateView, select, view) method
withEmas = staticSource1.update(
"EmaMin10=ema_price_10min.update(Timestamp, Last, USym)",\
"EmaMin60=ema_price_60min.update(Timestamp, Last, USym)",\
"EmaTick100=ema_price_100ticks.update(Timestamp, Last, USym)")
# notice the EMA is calculated by USym

# TradingHourseEma also exists, but is beyond the scope of this document
# That can be tied to a business calendar.
# Datetime method diffBusinessNanos() can also be helpful for that.

### Use CumulativeUtil 
# The CumulativeUtil class provides methods to simplify rolling and cumulative 
# aggregations of double values

cu = jpy.get_type("com.illumon.iris.db.v2.utils.CumulativeUtil")

t=db.t("LearnDeephaven","StockTrades")\
.where("Date=`2017-08-25`")\
.update("Volume=Last*Size")

t2=t.sort("Timestamp")

CumulativeMarketSum=cu.cumSum(t2,"CumulativeVolume","Volume")

CumulativeSumBySymbol=cu.cumSum(t,"Sym","CumulativeVolume","Volume")

CumulativeAvgBySymbol=cu.cumSum(t,"Sym","CumulativeAvgVolume","Volume")\
.update("CumulativeAvgVolume=CumulativeAvgVolume/(ii+1)")

# When using these methods with intraday (ticking) tables, it must be ensured that the appends of new rows are the only changes that happen to the source tables. This can be done using the assertAppendOnly method. If changes other than appends occur in the underlying table, an exception will be thrown, ending the query. This ensures that the results returned by CumulativeUtil methods are accurate.

ta = jpy.get_type("com.illumon.iris.db.tables.verify.TableAssertions")

tickingTable = db.i("DbInternal", "ProcessEventLog").where("Date=currentDateNy()")
appendOnlyTickingTable = ta.assertAppendOnly(tickingTable)
# CumulativeUtil methods would then be applied to appendOnlyTickingTable.

# USE NUMPY AND PANDAS
from deephaven import *
from deephaven import npy
from deephaven import tableToDataFrame
import numpy as np
import pandas as pd

# note these conversions are less immediately available than DH tables
# constrained by memory available
# no updates -- ticking sources are snapshotted
pandasDF = tableToDataFrame(staticSource1.where("Date = `2017-08-25`"))

# do Pandas stuff to prove it is a Pandas Dataframe
print(type(pandasDF))
print(pandasDF.dtypes)
print(pandasDF.describe())

# if all the columns can be of the same data type...
# and if the data set is very large, 
# it may be faster to make a Pandas Dataframe via Numpy
justNumbers = staticSource1.view("Last", "Size", "ExchangeId")
pandasDF_3Cols = pd.DataFrame(
    data=npy.numpy_slice(justNumbers, 0, justNumbers.size(), dtype=np.float32),
    columns=justNumbers.getDefinition().getColumnNamesArray()
)
print(type(pandasDF_3Cols)) 
print(pandasDF_3Cols.dtypes)


# get an example table to work with
aaplTrades = staticSource1.where("Date = `2017-08-25`", "USym = `AAPL`")

# IMPORTANT: Conversions from DH Tables to NumPy arrays are to only one-dimensional arrays!
# make numPy arrays of single columns
npArray_Price = columnToNumpyArray(aaplTrades, "Last")
npArray_TradeSize = columnToNumpyArray(aaplTrades, "Size")

# do NumPy stuff
print(type(npArray_Price))
print(npArray_Price.size)

print(npArray_Price[10:20])
print(npArray_Price.max())
print(npArray_TradeSize.sum())
print(np.unique(npArray_TradeSize))

# multiply arrays
npArray_Dollars = np.multiply(npArray_Price, npArray_TradeSize)

# make a 3xN NumPy array
np3Cols = np.stack((npArray_Price, npArray_TradeSize, npArray_Dollars), axis = 1)
print(np3Cols.shape)

# make a Pandas DF from the 3XN NumPy Array
pandasDF_fromNumPy = pd.DataFrame(np3Cols, columns = ['Price', 'Shares', 'Dollars'])
print(pandasDF_fromNumPy.aggregate(['sum', 'min', 'max', 'average', 'median']))

# turn Pandas DF into Deephaven Table
dhTable = dataFrameToTable(pandasDF)


# FORMAT TABLES
## Datetime Formatting

timeFormatting = staticSource1.view("Time1 = Timestamp", "Time2 = Timestamp", "Time3 = Timestamp", "Time4 = Timestamp", "Time5 = Timestamp", "Time6 = Timestamp")\
    .formatColumns("Time1=Date(`yyyy_MM_dd'T'HH-mm-ss.S z`)",
    "Time2 = Date(`yyyy-MM-dd'T'HH:mm t`)",
    "Time3 = Date(`HH:mm:ss`)",
    "Time4 = Date(`HH:mm:ss.SSSSSSSSS`)",
    "Time5 = Date(`EEE dd MMM yy HH:MM:ss`)",
    "Time6 = Date(`yyyy-MM-dd`)")\
    .updateView("Time7_string = formatDate(Time6, TZ_NY)")

## Number Formatting
numberFormatting = staticSource1.view("Last",\
"BigNum1 = Last * 1000", "BigNum2 = Last * 1000",\
"BigNum3 = Last * 1000", "Price1 = Last", "Price2 = Last",\
"Price3 = Last",\
"SmallNum1 = Last / 1000", "SmallNum2 = Last / 1000",\
"SmallNum3 = Last / 1000", "TinyNum = Last / 1_000_000_000")\
.formatColumns("BigNum1 = Decimal(`###,#00`)",\
"BigNum2 = Decimal(`##0.00`)",\
"BigNum3 = Decimal(`0.####E0`)",\
"Price1 = Decimal(`###,###.##`)",\
"Price2 = Decimal(`$###,##0.00`)",\
"SmallNum1 = Decimal(`##0.000000`)",\
"SmallNum2 = Decimal(`##0.00%`)",\
"SmallNum3 = Decimal(`##0%`)",\
"TinyNum = Decimal(`0.00E0`)")

# run these next three tables together
# create table for the example
numTable = staticSource1.view("Last",\
"Random1 = Last * (new Random().nextDouble())",\
"Random2 = Last * (new Random().nextDouble())",\
"PriceLong = (long)Last", "PriceInt = (int)Last",\
"BigNumFloat = (float)(Last * 1000)")

# efficient syntax to round many float and double columns across a table
from deephaven import *
roundColumns = ttools.roundDecimalColumns(numTable)
roundMostColums = ttools.roundDecimalColumnsExcept(numTable, "Last")

## Color Formatting
justColors = staticSource1\
.formatColumns("Timestamp = `#90F060`","SecurityType = LIGHTBLUE",\
"Exchange = colorRGB(247,204,0)",\
"USym = colorRGB(247,204,204)",\
"Sym = bgfg(colorRGB(57,43,128),colorRGB(243,247,122))",\
"Last = bgfga(MEDIUMVIOLETRED)",\
"Size = colorHSL(0, 24, 36)")

# need this for the last part of conditionalColors script
from deephaven import *
colorMe = DistinctFormatter()

conditionalColors = staticSource1.where("Date = `2017-08-25`")\
.updateView("RowMod10 = i % 10")\
.formatColumnWhere("Source", "SaleCondition = `@FTI`", "DEEP_RED")\
.formatColumnWhere("USym", "Last > Last_[i-1]", "LIMEGREEN")\
.formatRowWhere("ExchangeId % 10 = 0", "colorHSL(260, 94, 52)")\
.formatColumns("Size = (Size % 5 !=0) ? ORANGE : BLACK",\
"SecurityType = (Exchange = `Nasdaq` && SaleCondition = `@TI`) ? colorRGB(253, 31, 203) : colorRGB(171, 254, 42)",\
"RowMod10 = heatmap(RowMod10, 1.0, 10.0, BRIGHT_GREEN, BRIGHT_RED)",\
"Exchange = colorMe.getColor(Exchange)")

# list of canned colors found here: 
# https://docs.deephaven.io/latest/Content/User/misc/colorsAll.htm?Highlight=color

# one can formatColumns() for numbers and colors together
numberAndColor = staticSource1.formatColumns("Last = Decimal(`##0.00`)",\
 "Last = (Last > Last_[i-1]) ? FUCHSIA : GREY")


PLOT PROGRAMMATICALLY

# Substantial documentation about plotting exists 
#  https://docs.deephaven.io/latest/Content/writeQueries/plot/overview.htm
# The below intends to show the basics
# in particular, everything about styling and labeling is omitted from the below
# https://docs.deephaven.io/latest/Content/writeQueries/plot/components/index.htm
#https://docs.deephaven.io/latest/Content/writeQueries/plot/visual.htm#Visual_Formatting

from deephaven import *
from deephaven import Calendars
from deephaven import Plot

# create some tables to use in the plot examples
cal = Calendars.calendar("USNYSE")
fourUSym = staticSource1.where("USym in `GOOG`,`AAPL`,`CSCO`, `MSFT`",\
"cal.isBusinessTime(ExchangeTimestamp)", "SaleCondition!=`@W`",\
"inRange(Size, 100, 10_000)")\
.sort("Date", "USym", "ExchangeTimestamp")
fourUSymOneDay = fourUSym.where("Date = `2017-08-25`")
fourUSymBest = fourUSymOneDay\
.naturalJoin(fourUSymOneDay.firstBy("USym"), "USym", "Open = Last")\
            .updateView("PercChange = Last / Open - 1")\
            .aj(fourUSymOneDay.where("USym = `MSFT`"),\
"ExchangeTimestamp", "MsftPrice = Last")
justAaplMultiday = fourUSym.where("USym = `AAPL`")
justAaplBest = fourUSymBest.where("USym = `AAPL`")
dlrVolume = staticSource2.where("Ticker in `AAPL`, `CSCO`, `GOOG`,\
`MSFT`").update("DlrVolume = Close * Volume",\
"Month = formatDate(EODTimestamp, TZ_NY).substring(0,7)")

# Time series plots
timeSeriesPlot = Plot.plot("", justAaplBest, "ExchangeTimestamp", "Last").show()
		# note .show()

timeSeries2axes = Plot.plot("MSFT Series", fourUSymBest.where("USym = `MSFT`"),\
"ExchangeTimestamp", "Last")\
        	.twinX()\
        	.plot("GOOG Series", fourUSymBest.where("USym = `GOOG`"),\
"ExchangeTimestamp", "Last").show()
# note the first parameter .plot("this-one"... could not be ""
# see .twinX()

timeSeries3axes = Plot.plot("MSFT Series", fourUSymBest.where("USym = `MSFT`"),\
"ExchangeTimestamp", "Last")\
.twinX()\
.plot("GOOG Series", fourUSymBest.where("USym = `GOOG`"),\
"ExchangeTimestamp", "Last")\
.twinX()\
.plot("CSCO Series", fourUSymBest.where("USym = `CSCO`"),\
"ExchangeTimestamp", "Last").show()
# can do more than 3


# Multiple series on the same axis
timeSeriesSameAxis = Plot.plot("MSFT %-Chge", fourUSymBest.where("USym = `MSFT`"),\
"ExchangeTimestamp", "PercChange")\
.plot("GOOG %-Chge", fourUSymBest.where("USym = `GOOG`"),\
"ExchangeTimestamp", "PercChange").show()
# no .twinX() in this case

# Bar chart 
barChart = Plot.plot("", dlrVolume.where("Ticker = `AAPL`"),\
"EODTimestamp", "DlrVolume")\
.plotStyle("Bar").show()

# Plot-by-some key

plotBySeries = Plot.plotBy("Percent Change", fourUSymBest, "ExchangeTimestamp",\
"PercChange", "USym").show()
# see that the last overload "USym", in this case, is the key
# in this case this will create 4 series for the respective 4 USyms

# Stacked Area
stackedArea = Plot.plotBy("Dlr Volume", dlrVolume, "EODTimestamp",\
"DlrVolume", "Ticker")\
.plotStyle("stacked_area").show()

# Area graph
viewAsArea = timeSeriesSameAxis.plotStyle("Area").show()
		# see that it references the plotting object of the previous script

# Business time
uglyRawMultiday = Plot.plot("", justAaplMultiday, "ExchangeTimestamp", "Last")\
.show()
# use xBusinessTime() to get rid of overnight and weekend time
prettyWithBizTimeConstraint = Plot.plot("", justAaplMultiday, "ExchangeTimestamp",\
"Last")\
.xBusinessTime().show()
# this relies on the prevailing biz-time calendar that is set

# Scatter
scatterPlot = Plot.plot("", justAaplBest.where("i % 10 = 0"), "MsftPrice", "Last")\
        	.plotStyle("scatter").show()
# Note the manual down-sampling above
# Without it, for large tables (this one is 87k records) the UI will 
# make you choose whether you really want to graph so many points
# Deephaven smart-auto-downsampling is only for line plots

# Histogram
plotHistogram = Plot.histPlot("", justAaplBest, "Last", 20).show()
catHistogramBySym = Plot.catHistPlot("", fourUSymBest, "USym").show() 
# de facto count of rows

# Open-high-low-close
plotOHLC = Plot.ohlcPlot("", staticSource2.where("Ticker = `AAPL`"), "EODTimestamp", "Open", "High", "Low", "Close").show()
# syntax exists to profile data into time bins with OHLC


# Error Bar Plot
errorBarTable = justAaplBest\
.updateView("TimeBin=upperBin(ExchangeTimestamp, 15 * MINUTE)")\
        	.by(caf.AggCombo(caf.AggAvg("AvgPrice = Last"),\
caf.AggStd("StdPrice = Last")), "TimeBin")\
.updateView("Low = AvgPrice - StdPrice", "Hi = AvgPrice + StdPrice")
errorBarPlot = Plot.errorBarY("", errorBarTable,"TimeBin",\
"AvgPrice", "Low", "Hi").show()

## One-click plotting


# One-click plotting allows a script writer to designate a key that users
# can enter into a one-click or linker experience, thereby toggling
# the plot to filter on the fly

# Users will need to add an Input Filter or configure the Linker to their UI
# to manipulate the One-Click plots.  
# See https://docs.deephaven.io/latest/Content/web/controls.htm

oneClickEx = Plot.oneClick(fourUSymBest, "USym")
		# this maps the one-click experience to the key of USym (in this case)
		# there is no table visible from this
		# in DH-speak it saves the OneClick SelectableDataSet to the variable 
oneClickPlot = Plot.plot("USym", oneClickEx, "ExchangeTimestamp", "Last").show()


# Another one-click example
datasetHolder2 = Plot.oneClick(dlrVolume.view("Month", "EODTimestamp", "DlrVolume")\
.sumBy("Month", "EODTimestamp").sort("EODTimestamp"), "Month")\
oneClickPlot2 = Plot.plot("Dlr Volume", datasetHolder3, "EODTimestamp","DlrVolume")\
.plotStyle("Area").show()

# change the Input Filter in the UI to 'Month'
# then, for example, type in that box 2017-03