Skip to content

Commit

Permalink
Add FILTER_DISTINCT function
Browse files Browse the repository at this point in the history
  • Loading branch information
dlurton committed Apr 29, 2022
1 parent 41e6384 commit 405ae74
Show file tree
Hide file tree
Showing 4 changed files with 228 additions and 3 deletions.
26 changes: 23 additions & 3 deletions docs/user/BuiltInFunctions.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,6 @@ CAST(<<'a', 'b'>> AS bag) -- <<'a', 'b'>> (REPL does not display << >> and comma

### CHAR_LENGTH, CHARACTER_LENGTH



Counts the number of characters in the specified string, where 'character' is defined as a single unicode code point.

*Note:* `CHAR_LENGTH` and `CHARACTER_LENGTH` are synonyms.
Expand Down Expand Up @@ -455,9 +453,31 @@ EXTRACT(TIMEZONE_MINUTE FROM TIME WITH TIME ZONE '23:12:59-08:30') -- -30
```
*Note* that `timezone_hour` and `timezone_minute` are **not supported** for `DATE` and `TIME` (without time zone) type.

### `FILTER_DISTINCT`

Signature
: `FILTER_DISTINCT: Container -> Bag`

Header
: `FILTER_DISTINCT(c)`

Purpose
: Returns a bag of distinct values contained within a bag, list, sexp, or struct. If the container is a struct,
the field names are not considered.

Examples
:

```sql
FILTER_DISTINCT([0, 0, 1]) -- <<0, 1>>
FILTER_DISTINCT(<<0, 0, 1>>) -- <<0, 1>>
FILTER_DISTINCT(SEXP(0, 0, 1)) -- <<0, 1>>
FILTER_DISTINCT({'a': 0, 'b': 0, 'c': 1}) -- <<0, 1>>
```

### LOWER

Given a string convert all upper case characters to lower case characters.
Given a string convert all upper case characters to lower case characters.

Signature
: `LOWER: String -> String`
Expand Down
101 changes: 101 additions & 0 deletions lang/src/org/partiql/lang/eval/builtins/DynamicLookupExprFunction.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package org.partiql.lang.eval.builtins

import org.partiql.lang.errors.ErrorCode
import org.partiql.lang.eval.BindingCase
import org.partiql.lang.eval.BindingName
import org.partiql.lang.eval.EvaluationException
import org.partiql.lang.eval.EvaluationSession
import org.partiql.lang.eval.ExprFunction
import org.partiql.lang.eval.ExprValue
import org.partiql.lang.eval.ExprValueType
import org.partiql.lang.eval.physical.throwUndefinedVariableException
import org.partiql.lang.eval.stringValue
import org.partiql.lang.types.FunctionSignature
import org.partiql.lang.types.StaticType
import org.partiql.lang.types.VarargFormalParameter

/**
* Performs dynamic variable resolution. Query authors should never call this function directly (and indeed it is
* named to avoid collision with the names of custom functions)--instead, the query planner injects call sites
* to this function to perform dynamic variable resolution of undefined variables. This provides a migration path
* for legacy customers that depend on this behavior.
*
* Arguments:
*
* - variable name
* - case sensitivity
* - lookup strategy (globals then locals or locals then globals)
* - A variadic list of locations to be searched.
*
* The variadic arguments must be of type `any` because the planner doesn't yet have knowledge of static types
* and therefore cannot filter out local variables types that are not structs.
*/
class DynamicLookupExprFunction : ExprFunction {
override val signature: FunctionSignature
get() {
return FunctionSignature(
name = DYNAMIC_LOOKUP_FUNCTION_NAME,
// Required parameters are: variable name, case sensitivity and lookup strategy
requiredParameters = listOf(StaticType.SYMBOL, StaticType.SYMBOL, StaticType.SYMBOL),
variadicParameter = VarargFormalParameter(StaticType.ANY, 0..Int.MAX_VALUE),
returnType = StaticType.ANY
)
}

override fun callWithVariadic(
session: EvaluationSession,
required: List<ExprValue>,
variadic: List<ExprValue>
): ExprValue {
val variableName = required[0].stringValue()

val caseSensitivity = when (val caseSensitivityParameterValue = required[1].stringValue()) {
"case_sensitive" -> BindingCase.SENSITIVE
"case_insensitive" -> BindingCase.INSENSITIVE
else -> throw EvaluationException(
message = "Invalid case sensitivity: $caseSensitivityParameterValue",
errorCode = ErrorCode.INTERNAL_ERROR,
internal = true
)
}

val bindingName = BindingName(variableName, caseSensitivity)

val globalsFirst = when (val lookupStrategyParameterValue = required[2].stringValue()) {
"locals_then_globals" -> false
"globals_then_locals" -> true
else -> throw EvaluationException(
message = "Invalid lookup strategy: $lookupStrategyParameterValue",
errorCode = ErrorCode.INTERNAL_ERROR,
internal = true
)
}

val found = when {
globalsFirst -> {
session.globals[bindingName] ?: searchLocals(variadic, bindingName)
}
else -> {
searchLocals(variadic, bindingName) ?: session.globals[bindingName]
}
}

if (found == null) {
// We don't know the metas inside ExprFunction implementations. The ThunkFactory error handlers
// should add line & col info to the exception & rethrow anyway.
throwUndefinedVariableException(bindingName, metas = null)
} else {
return found
}
}

private fun searchLocals(possibleLocations: List<ExprValue>, bindingName: BindingName) =
possibleLocations.asSequence().map {
when (it.type) {
ExprValueType.STRUCT ->
it.bindings[bindingName]
else ->
null
}
}.firstOrNull { it != null }
}
31 changes: 31 additions & 0 deletions lang/src/org/partiql/lang/eval/physical/UndefinedVariableUtil.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package org.partiql.lang.eval.physical

import com.amazon.ionelement.api.MetaContainer
import org.partiql.lang.errors.ErrorCode
import org.partiql.lang.errors.Property
import org.partiql.lang.errors.UNBOUND_QUOTED_IDENTIFIER_HINT
import org.partiql.lang.eval.BindingCase
import org.partiql.lang.eval.BindingName
import org.partiql.lang.eval.EvaluationException
import org.partiql.lang.eval.errorContextFrom
import org.partiql.lang.util.propertyValueMapOf

internal fun throwUndefinedVariableException(
bindingName: BindingName,
metas: MetaContainer?
): Nothing {
val (errorCode, hint) = when (bindingName.bindingCase) {
BindingCase.SENSITIVE ->
ErrorCode.EVALUATOR_QUOTED_BINDING_DOES_NOT_EXIST to " $UNBOUND_QUOTED_IDENTIFIER_HINT"
BindingCase.INSENSITIVE ->
ErrorCode.EVALUATOR_BINDING_DOES_NOT_EXIST to ""
}
throw EvaluationException(
message = "No such binding: ${bindingName.name}.$hint",
errorCode = errorCode,
errorContext = (metas?.let { errorContextFrom(metas) } ?: propertyValueMapOf()).also {
it[Property.BINDING_NAME] = bindingName.name
},
internal = false
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package org.partiql.lang.eval.builtins.functions

import org.junit.Test
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.ArgumentsSource
import org.partiql.lang.errors.ErrorCode
import org.partiql.lang.errors.Property
import org.partiql.lang.eval.EvaluatorTestBase
import org.partiql.lang.eval.builtins.ExprFunctionTestCase
import org.partiql.lang.eval.builtins.checkInvalidArity
import org.partiql.lang.util.ArgumentsProviderBase
import org.partiql.lang.util.propertyValueMapOf
import org.partiql.lang.util.to

class FilterDistinctEvaluationTest : EvaluatorTestBase() {
// Pass test cases
@ParameterizedTest
@ArgumentsSource(ToStringPassCases::class)
fun runPassTests(testCase: ExprFunctionTestCase) =
runEvaluatorTestCase(query = testCase.source, expectedResult = testCase.expectedLegacyModeResult)

// We rely on the built-in [DEFAULT_COMPARATOR] for the actual definition of equality, which is not being tested
// here.
class ToStringPassCases : ArgumentsProviderBase() {
override fun getParameters(): List<Any> = listOf(

// These three tests ensure we can accept lists, bags, s-expressions and structs
ExprFunctionTestCase("filter_distinct([0, 0, 1])", "[0, 1]"), // list
ExprFunctionTestCase("filter_distinct(<<0, 0, 1>>)", "[0, 1]"), // bag
ExprFunctionTestCase("filter_distinct(SEXP(0, 0, 1))", "[0, 1]"), // s-exp
ExprFunctionTestCase("filter_distinct({'a': 0, 'b': 0, 'c': 1})", "[0, 1]"), // struct

// Some "smoke tests" to ensure the basic plumbing is working right.
ExprFunctionTestCase("filter_distinct(['foo', 'foo', 1, 1, `symbol`, `symbol`])", "[\"foo\", 1, symbol]"),
ExprFunctionTestCase("filter_distinct([{ 'a': 1 }, { 'a': 1 }, { 'a': 1 }])", "[{ 'a': 1 }]"),
ExprFunctionTestCase("filter_distinct([[1, 1], [1, 1], [2, 2]])", "[[1,1], [2, 2]]"),
)
}

// Error test cases: Invalid arguments
data class InvalidArgTestCase(
val source: String,
val actualArgumentType: String
)

@ParameterizedTest
@ArgumentsSource(InvalidArgCases::class)
fun toStringInvalidArgumentTests(testCase: InvalidArgTestCase) = runEvaluatorErrorTestCase(
query = testCase.source,
expectedErrorCode = ErrorCode.EVALUATOR_INCORRECT_TYPE_OF_ARGUMENTS_TO_FUNC_CALL,
expectedErrorContext = propertyValueMapOf(
1, 1,
Property.FUNCTION_NAME to "filter_distinct",
Property.EXPECTED_ARGUMENT_TYPES to "BAG, LIST, SEXP, or STRUCT",
Property.ACTUAL_ARGUMENT_TYPES to testCase.actualArgumentType,
Property.ARGUMENT_POSITION to 1
),
expectedPermissiveModeResult = "MISSING",
)

class InvalidArgCases : ArgumentsProviderBase() {
override fun getParameters(): List<Any> = listOf(
InvalidArgTestCase("filter_distinct(1)", "INT"),
InvalidArgTestCase("filter_distinct(1.0)", "DECIMAL"),
InvalidArgTestCase("filter_distinct('foo')", "STRING"),
InvalidArgTestCase("filter_distinct(`some_symbol`)", "SYMBOL"),
InvalidArgTestCase("filter_distinct(`{{ '''a clob''' }}`)", "CLOB"),
)
}

@Test
fun invalidArityTest() = checkInvalidArity(funcName = "filter_distinct", maxArity = 1, minArity = 1)
}

0 comments on commit 405ae74

Please sign in to comment.