Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add FILTER_DISTINCT function. #589

Merged
merged 6 commits into from
May 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions docs/user/BuiltInFunctions.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,6 @@ CAST(<<'a', 'b'>> AS bag) -- <<'a', 'b'>> (REPL does not display << >> and comma

### CHAR_LENGTH, CHARACTER_LENGTH



Counts the number of characters in the specified string, where 'character' is defined as a single unicode code point.

*Note:* `CHAR_LENGTH` and `CHARACTER_LENGTH` are synonyms.
Expand Down Expand Up @@ -455,9 +453,31 @@ EXTRACT(TIMEZONE_MINUTE FROM TIME WITH TIME ZONE '23:12:59-08:30') -- -30
```
*Note* that `timezone_hour` and `timezone_minute` are **not supported** for `DATE` and `TIME` (without time zone) type.

### `FILTER_DISTINCT`

Signature
: `FILTER_DISTINCT: Container -> Bag`

Header
: `FILTER_DISTINCT(c)`

Purpose
: Returns a bag of distinct values contained within a bag, list, sexp, or struct. If the container is a struct,
the field names are not considered.

Examples
:

```sql
FILTER_DISTINCT([0, 0, 1]) -- <<0, 1>>
FILTER_DISTINCT(<<0, 0, 1>>) -- <<0, 1>>
FILTER_DISTINCT(SEXP(0, 0, 1)) -- <<0, 1>>
FILTER_DISTINCT({'a': 0, 'b': 0, 'c': 1}) -- <<0, 1>>
```

### LOWER

Given a string convert all upper case characters to lower case characters.
Given a string convert all upper case characters to lower case characters.

Signature
: `LOWER: String -> String`
Expand Down
27 changes: 27 additions & 0 deletions lang/src/org/partiql/lang/eval/builtins/BuiltinFunctions.kt
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,18 @@
package org.partiql.lang.eval.builtins

import com.amazon.ion.system.IonSystemBuilder
import org.partiql.lang.eval.DEFAULT_COMPARATOR
import org.partiql.lang.eval.EvaluationSession
import org.partiql.lang.eval.ExprFunction
import org.partiql.lang.eval.ExprValue
import org.partiql.lang.eval.ExprValueFactory
import org.partiql.lang.eval.stringValue
import org.partiql.lang.eval.unnamedValue
import org.partiql.lang.types.AnyOfType
import org.partiql.lang.types.FunctionSignature
import org.partiql.lang.types.StaticType
import org.partiql.lang.types.UnknownArguments
import java.util.TreeSet

internal const val DYNAMIC_LOOKUP_FUNCTION_NAME = "\$__dynamic_lookup__"

Expand All @@ -42,6 +45,7 @@ internal fun createBuiltinFunctions(valueFactory: ExprValueFactory) =
createCharacterLength("character_length", valueFactory),
createCharacterLength("char_length", valueFactory),
createUtcNow(valueFactory),
createFilterDistinct(valueFactory),
DateAddExprFunction(valueFactory),
DateDiffExprFunction(valueFactory),
ExtractExprFunction(valueFactory),
Expand Down Expand Up @@ -79,6 +83,29 @@ internal fun createUtcNow(valueFactory: ExprValueFactory): ExprFunction = object
valueFactory.newTimestamp(session.now)
}

internal fun createFilterDistinct(valueFactory: ExprValueFactory): ExprFunction = object : ExprFunction {
override val signature = FunctionSignature(
"filter_distinct",
listOf(StaticType.unionOf(StaticType.BAG, StaticType.LIST, StaticType.SEXP, StaticType.STRUCT)),
returnType = StaticType.BAG
)

override fun callWithRequired(session: EvaluationSession, required: List<ExprValue>): ExprValue {
val argument = required.first()
// We cannot use a [HashSet] here because [ExprValue] does not implement .equals() and .hashCode()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be useful for PartiQL to have the equivalent of Ion Hash for use cases like this.

val encountered = TreeSet(DEFAULT_COMPARATOR)
return valueFactory.newBag(
sequence {
argument.asSequence().forEach {
if (!encountered.contains(it)) {
encountered.add(it.unnamedValue())
yield(it)
}
}
}
)
}
}
internal fun createCharacterLength(name: String, valueFactory: ExprValueFactory): ExprFunction =
object : ExprFunction {
override val signature: FunctionSignature
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package org.partiql.lang.eval.builtins.functions

import org.junit.Test
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.ArgumentsSource
import org.partiql.lang.errors.ErrorCode
import org.partiql.lang.errors.Property
import org.partiql.lang.eval.EvaluatorTestBase
import org.partiql.lang.eval.builtins.ExprFunctionTestCase
import org.partiql.lang.eval.builtins.checkInvalidArity
import org.partiql.lang.util.ArgumentsProviderBase
import org.partiql.lang.util.propertyValueMapOf
import org.partiql.lang.util.to

class FilterDistinctEvaluationTest : EvaluatorTestBase() {
// Pass test cases
@ParameterizedTest
@ArgumentsSource(ToStringPassCases::class)
fun runPassTests(testCase: ExprFunctionTestCase) =
runEvaluatorTestCase(query = testCase.source, expectedResult = testCase.expectedLegacyModeResult)

// We rely on the built-in [DEFAULT_COMPARATOR] for the actual definition of equality, which is not being tested
// here.
class ToStringPassCases : ArgumentsProviderBase() {
override fun getParameters(): List<Any> = listOf(

// These three tests ensure we can accept lists, bags, s-expressions and structs
ExprFunctionTestCase("filter_distinct([0, 0, 1])", "[0, 1]"), // list
ExprFunctionTestCase("filter_distinct(<<0, 0, 1>>)", "[0, 1]"), // bag
ExprFunctionTestCase("filter_distinct(SEXP(0, 0, 1))", "[0, 1]"), // s-exp
ExprFunctionTestCase("filter_distinct({'a': 0, 'b': 0, 'c': 1})", "[0, 1]"), // struct

// Some "smoke tests" to ensure the basic plumbing is working right.
ExprFunctionTestCase("filter_distinct(['foo', 'foo', 1, 1, `symbol`, `symbol`])", "[\"foo\", 1, symbol]"),
ExprFunctionTestCase("filter_distinct([{ 'a': 1 }, { 'a': 1 }, { 'a': 1 }])", "[{ 'a': 1 }]"),
ExprFunctionTestCase("filter_distinct([[1, 1], [1, 1], [2, 2]])", "[[1,1], [2, 2]]"),
)
}

// Error test cases: Invalid arguments
data class InvalidArgTestCase(
val source: String,
val actualArgumentType: String
)

@ParameterizedTest
@ArgumentsSource(InvalidArgCases::class)
fun toStringInvalidArgumentTests(testCase: InvalidArgTestCase) = runEvaluatorErrorTestCase(
query = testCase.source,
expectedErrorCode = ErrorCode.EVALUATOR_INCORRECT_TYPE_OF_ARGUMENTS_TO_FUNC_CALL,
expectedErrorContext = propertyValueMapOf(
1, 1,
Property.FUNCTION_NAME to "filter_distinct",
Property.EXPECTED_ARGUMENT_TYPES to "BAG, LIST, SEXP, or STRUCT",
Property.ACTUAL_ARGUMENT_TYPES to testCase.actualArgumentType,
Property.ARGUMENT_POSITION to 1
),
expectedPermissiveModeResult = "MISSING",
)

class InvalidArgCases : ArgumentsProviderBase() {
override fun getParameters(): List<Any> = listOf(
InvalidArgTestCase("filter_distinct(1)", "INT"),
InvalidArgTestCase("filter_distinct(1.0)", "DECIMAL"),
InvalidArgTestCase("filter_distinct('foo')", "STRING"),
InvalidArgTestCase("filter_distinct(`some_symbol`)", "SYMBOL"),
InvalidArgTestCase("filter_distinct(`{{ '''a clob''' }}`)", "CLOB"),
)
}

@Test
fun invalidArityTest() = checkInvalidArity(funcName = "filter_distinct", maxArity = 1, minArity = 1)
}