Skip to content

Commit

Permalink
trino: Add an experimental FARM_FINGERPRINT UDF
Browse files Browse the repository at this point in the history
  • Loading branch information
emk committed Nov 7, 2023
1 parent 6c65082 commit 0645dca
Show file tree
Hide file tree
Showing 10 changed files with 222 additions and 3 deletions.
6 changes: 4 additions & 2 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
{
"editor.formatOnSave": true
}
"editor.formatOnSave": true,
"java.compile.nullAnalysis.mode": "automatic",
"java.configuration.updateBuildConfiguration": "automatic"
}
1 change: 1 addition & 0 deletions java/trino-plugin/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/target/
2 changes: 2 additions & 0 deletions java/trino-plugin/.tool-versions
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
java adoptopenjdk-17.0.9+9
maven 3.9.5
39 changes: 39 additions & 0 deletions java/trino-plugin/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Experimental Trino Plugin

Install the [`asdf` version manager][asdf] and the plugins needed for Java:

```sh
asdf plugin-add java https://github.com/halcyon/asdf-java.git
asdf plugin-add maven https://github.com/halcyon/asdf-maven.git
```

Then install a Java environment using versions specified in `.tool-versions`:

```sh
asdf install
```

You should be able to "package" the plugin into `./target/` using:

```sh
mvn package
```

Then you can run Trino with the plugin using:

```sh
docker run --name trino -d --mount type=bind,source="$(pwd)"/target,target=/usr/lib/trino/plugin/joinery -p 8080:8080 trinodb/trino
```

This should give you a working `FARM_FINGERPRINT` function, which you can test
as follows:

```txt
❯ docker exec -it trino trino
trino> select farm_fingerprint('Hello');
_col0
----------------------
-3042045079152025465
```

[asdf]: https://asdf-vm.com/
105 changes: 105 additions & 0 deletions java/trino-plugin/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>ai.faraday.joinery</groupId>
<artifactId>trino-plugin</artifactId>
<version>1.0-SNAPSHOT</version>

<name>trino-plugin</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
</properties>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>32.1.3-jre</version>
</dependency>
<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-spi</artifactId>
<version>429</version>
<!-- <scope>provided</scope> -->
</dependency>
</dependencies>

<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
</plugins>
</pluginManagement>
<!-- Copy dependencies into ./target for now so we can mount the
whole directory into a Trino Docker container. -->
<plugins>
<plugin>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<!-- Originally this was "install". -->
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package ai.faraday.joinery;

import java.nio.charset.StandardCharsets;

import com.google.common.hash.Hashing;

import io.airlift.slice.Slice;
import io.trino.spi.function.Description;
import io.trino.spi.function.ScalarFunction;
import io.trino.spi.function.SqlNullable;
import io.trino.spi.function.SqlType;
import io.trino.spi.type.StandardTypes;

public class FarmFingerprintFunction {
@ScalarFunction("farm_fingerprint")
@Description("Returns FARM_FINGERPRINT of the given string")
@SqlNullable
@SqlType(StandardTypes.BIGINT)
public static Long farmFingerprint(
@SqlNullable @SqlType(StandardTypes.VARCHAR) Slice string) {
if (string == null) {
return null;
}
return Hashing.farmHashFingerprint64().hashString(string.toStringUtf8(), StandardCharsets.UTF_8).asLong();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package ai.faraday.joinery;

import java.util.Set;

import com.google.common.collect.ImmutableSet;

public class JoineryPlugin implements io.trino.spi.Plugin {
@Override
public Set<Class<?>> getFunctions() {
return ImmutableSet.<Class<?>>builder()
.add(FarmFingerprintFunction.class)
.build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ai.faraday.joinery.JoineryPlugin
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package ai.faraday.joinery;

import static java.nio.charset.StandardCharsets.UTF_8;

import static org.junit.Assert.assertEquals;

import org.junit.Test;

import io.airlift.slice.Slice;
import io.airlift.slice.Slices;

/**
* Unit test for simple App.
*/
public class FarmFingerprintFunctionTest {
@Test
public void handlesNull() {
assertEquals(FarmFingerprintFunction.farmFingerprint(null), null);
}

@Test
public void calculatesExpectedHash() {
Slice hello = Slices.copiedBuffer("Hello", UTF_8);
Long expected = -3042045079152025465L;
assertEquals(FarmFingerprintFunction.farmFingerprint(hello), expected);
}
}
4 changes: 3 additions & 1 deletion tests/sql/functions/simple/farm_fingerprint.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
-- pending: snowflake FARM_FINGERPRINT only exists on BigQuery
-- pending: sqlite3 FARM_FINGERPRINT only exists on BigQuery
-- pending: trino FARM_FINGERPRINT only exists on BigQuery
--
-- This works on Trino if you load the UDF as described in `./java/README.md`.

CREATE OR REPLACE TABLE __result1 AS
SELECT
FARM_FINGERPRINT('foo') AS str_farm,
Expand Down

0 comments on commit 0645dca

Please sign in to comment.