Skip to content

Commit

Permalink
TINKERPOP-3028 Translators using grammar
Browse files Browse the repository at this point in the history
Implemented Java, Python, a normalizer for language and an anonymizer implementation.
  • Loading branch information
spmallette committed Dec 28, 2023
1 parent 7f35052 commit ba51c5c
Show file tree
Hide file tree
Showing 29 changed files with 5,286 additions and 1,196 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ image::https://raw.githubusercontent.com/apache/tinkerpop/master/docs/static/ima
=== TinkerPop 4.0.0 (NOT OFFICIALLY RELEASED YET)
* Added support for deserialization of `Set` for `gremlin-javascript`.
* Made `SeedStrategy` construction consistent with the builder pattern and removed the public constructor.
* Added grammar-based `Translator`.
* Added integer overflow checks for `sum()`.
* Gremlin Server only supports instantiation of `authentication.authenticationHandler` with three-arg constructor.
* Removed previously deprecated two-arg constructors for `authentication.authenticationHandler` implementations.
Expand Down
2 changes: 1 addition & 1 deletion docs/src/reference/the-traversal.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -5815,7 +5815,7 @@ With `SeedStrategy` in place, however, the same order is applied each time:
[gremlin-groovy,modern]
----
seedStrategy = new SeedStrategy(999998L)
seedStrategy = SeedStrategy.build().seed(999998L).create()
g.withStrategies(seedStrategy).V().values('name').fold().order(local).by(shuffle)
g.withStrategies(seedStrategy).V().values('name').fold().order(local).by(shuffle)
g.withStrategies(seedStrategy).V().values('name').fold().order(local).by(shuffle)
Expand Down
5 changes: 5 additions & 0 deletions docs/src/upgrade/release-4.x.x.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ these were deserialized into arrays.
Integer overflows caused by addition and multiplication operations will throw an exception instead of being silently
skipped with incorrect result.
==== SeedStrategy Construction
The `SeedStrategy` public constructor has been removed for Java and has been replaced by the builder pattern common
to all strategies. This change was made to ensure that the `SeedStrategy` could be constructed in a consistent manner.
=== Upgrading for Providers
==== Graph System Providers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,10 @@ protected void notImplemented(final ParseTree ctx) {
* {@inheritDoc}
*/
@Override public T visitTraversalStrategyArgs_ProductiveByStrategy(final GremlinParser.TraversalStrategyArgs_ProductiveByStrategyContext ctx) { return null; }
/**
* {@inheritDoc}
*/
@Override public T visitTraversalStrategyArgs_SeedStrategy(final GremlinParser.TraversalStrategyArgs_SeedStrategyContext ctx) { return null; }
/**
* {@inheritDoc}
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,17 @@ else if (strategyName.equals(EdgeLabelVerificationStrategy.class.getSimpleName()
else if (strategyName.equals(SubgraphStrategy.class.getSimpleName()))
return getSubgraphStrategy(ctx.traversalStrategyArgs_SubgraphStrategy());
else if (strategyName.equals(SeedStrategy.class.getSimpleName()))
return new SeedStrategy(antlr.argumentVisitor.parseNumber(ctx.integerArgument()).longValue());
return getSeedStrategy(ctx.traversalStrategyArgs_SeedStrategy());
else if (strategyName.equals(ProductiveByStrategy.class.getSimpleName()))
return getProductiveByStrategy(ctx.traversalStrategyArgs_ProductiveByStrategy());
}
throw new IllegalStateException("Unexpected TraversalStrategy specification - " + ctx.getText());
}

private SeedStrategy getSeedStrategy(final GremlinParser.TraversalStrategyArgs_SeedStrategyContext ctx) {
return SeedStrategy.build().seed(antlr.argumentVisitor.parseNumber(ctx.integerArgument()).longValue()).create();
}

private EdgeLabelVerificationStrategy getEdgeLabelVerificationStrategy(final List<GremlinParser.TraversalStrategyArgs_EdgeLabelVerificationStrategyContext> ctxs) {
if (null == ctxs || ctxs.isEmpty())
return EdgeLabelVerificationStrategy.build().create();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.tinkerpop.gremlin.language.translator;

import org.antlr.v4.runtime.ParserRuleContext;
import org.apache.tinkerpop.gremlin.language.grammar.GremlinParser;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* A translator that anonymizes Gremlin so that arguments that might contain sensitive information are removed.
*/
public class AnonymizedTranslatorVisitor extends TranslateVisitor {

private final Map<String, Map<Object, String>> simpleNameToObjectCache = new HashMap<>();

public AnonymizedTranslatorVisitor() {
this("g");
}

public AnonymizedTranslatorVisitor(final String graphTraversalSourceName) {
super(graphTraversalSourceName);
}

/**
* Anonymizes the given context by replacing the text with a lower case version of the class name and a number
* that is incremented for each unique value.
*
* @param ctx the context to anonymize
* @param clazz the class of the context
* @return null
*/
protected Void anonymize(final ParserRuleContext ctx, final Class<?> clazz) {
final String text = ctx.getText();
final String type = clazz.getSimpleName();
Map<Object, String> objectToAnonymizedString = simpleNameToObjectCache.get(type);
if (objectToAnonymizedString != null){
// this object type has been handled at least once before
final String innerValue = objectToAnonymizedString.get(text);
if (innerValue != null){
sb.append(innerValue);
} else {
final String anonymizedValue = type.toLowerCase() + objectToAnonymizedString.size();
objectToAnonymizedString.put(text, anonymizedValue);
sb.append(anonymizedValue);
}
} else {
objectToAnonymizedString = new HashMap<>();
simpleNameToObjectCache.put(type, objectToAnonymizedString);
final String anonymizedValue = type.toLowerCase() + objectToAnonymizedString.size();
objectToAnonymizedString.put(text, anonymizedValue);
sb.append(anonymizedValue);
}
return null;
}

@Override
public Void visitGenericLiteralCollection(final GremlinParser.GenericLiteralCollectionContext ctx) {
return anonymize(ctx, List.class);
}

@Override
public Void visitGenericLiteralMap(final GremlinParser.GenericLiteralMapContext ctx) {
return anonymize(ctx, Map.class);
}

@Override
public Void visitGenericLiteralMapNullableArgument(final GremlinParser.GenericLiteralMapNullableArgumentContext ctx) {
return anonymize(ctx, Map.class);
}

@Override
public Void visitStringLiteral(final GremlinParser.StringLiteralContext ctx) {
return anonymize(ctx, String.class);
}

@Override
public Void visitStringNullableLiteral(final GremlinParser.StringNullableLiteralContext ctx) {
return anonymize(ctx, String.class);
}

@Override
public Void visitIntegerLiteral(final GremlinParser.IntegerLiteralContext ctx) {
final String integerLiteral = ctx.getText().toLowerCase();

// check suffix
final int lastCharIndex = integerLiteral.length() - 1;
final char lastCharacter = integerLiteral.charAt(lastCharIndex);
switch (lastCharacter) {
case 'b':
anonymize(ctx, Byte.class);
break;
case 's':
anonymize(ctx, Short.class);
break;
case 'i':
anonymize(ctx, Integer.class);
break;
case 'l':
anonymize(ctx, Long.class);
break;
case 'n':
anonymize(ctx, BigInteger.class);
break;
default:
anonymize(ctx, Number.class);
break;
}
return null;
}

@Override
public Void visitFloatLiteral(final GremlinParser.FloatLiteralContext ctx) {
final String floatLiteral = ctx.getText().toLowerCase();

// check suffix
final int lastCharIndex = floatLiteral.length() - 1;
final char lastCharacter = floatLiteral.charAt(lastCharIndex);
switch (lastCharacter) {
case 'f':
anonymize(ctx, Float.class);
break;
case 'd':
anonymize(ctx, Double.class);
break;
case 'm':
anonymize(ctx, BigDecimal.class);
break;
default:
anonymize(ctx, Number.class);
break;
}
return null;
}

@Override
public Void visitBooleanLiteral(final GremlinParser.BooleanLiteralContext ctx) {
return anonymize(ctx, Boolean.class);
}

@Override
public Void visitDateLiteral(final GremlinParser.DateLiteralContext ctx) {
return anonymize(ctx, Date.class);
}

@Override
public Void visitNullLiteral(final GremlinParser.NullLiteralContext ctx) {
return anonymize(ctx, Object.class);
}

@Override
public Void visitNanLiteral(final GremlinParser.NanLiteralContext ctx) {
return anonymize(ctx, Number.class);
}

@Override
public Void visitInfLiteral(final GremlinParser.InfLiteralContext ctx) {
return anonymize(ctx, Number.class);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.tinkerpop.gremlin.language.translator;

import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.atn.PredictionMode;
import org.apache.tinkerpop.gremlin.language.grammar.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Translates a String representation of a Gremlin traversal into a various source code representation of that traversal
* using the ANTLR grammar.
*/
public class GremlinTranslator {
private static final Logger log = LoggerFactory.getLogger(GremlinTranslator.class);
private static final GremlinErrorListener errorListener = new GremlinErrorListener();

/**
* Convert a Gremlin string into a Java source code representation of that traversal.
*/
public static Translation translate(final String query) {
return translate(query, "g");
}

/**
* Convert a Gremlin string into a Java source code representation of that traversal.
*/
public static Translation translate(final String query, final String graphTraversalSourceName) {
return translate(query, graphTraversalSourceName, Translator.JAVA);
}

/**
* Convert a Gremlin string into the specified source code representation of that traversal.
*/
public static Translation translate(final String query, final String graphTraversalSourceName, final Translator translator) {
return translate(query, translator.getTranslateVisitor(graphTraversalSourceName));
}

/**
* Convert a Gremlin string into the specified source code representation of that traversal.
*/
public static Translation translate(final String query, final Translator translator) {
return translate(query, translator.getTranslateVisitor("g"));
}

/**
* Convert a Gremlin string into a source code representation of that traversal using a custom visitor.
*/
public static Translation translate(final String query, final TranslateVisitor visitor) {
final CharStream in = CharStreams.fromString(query);
final GremlinLexer lexer = new GremlinLexer(in);
lexer.removeErrorListeners();
lexer.addErrorListener(errorListener);

final CommonTokenStream tokens = new CommonTokenStream(lexer);

// Setup error handler on parser
final GremlinParser parser = new GremlinParser(tokens);
// SLL prediction mode is faster than the LL prediction mode when parsing the grammar,
// but it does not cover parsing all types of input. We use the SLL by default, and fallback
// to LL mode if fails to parse the query.
parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
parser.removeErrorListeners();
parser.addErrorListener(errorListener);

GremlinParser.QueryListContext queryContext;
try {
queryContext = parser.queryList();
} catch (Exception ex) {
// Retry parsing the query again with using LL prediction mode. LL parsing mode is more powerful
// so retrying the parsing would help parsing the rare edge cases.
try {
tokens.seek(0); // rewind input stream
lexer.reset();
parser.reset();
parser.getInterpreter().setPredictionMode(PredictionMode.LL);
log.debug("Query parsed with using LL prediction mode: {}", query);
queryContext = parser.queryList();
} catch (Exception e) {
log.debug("Query parsing failed in retry with exception" + e);
throw new GremlinParserException("Failed to interpret Gremlin query: " + e.getMessage());
}
}

try {
visitor.visit(queryContext);
} catch (ClassCastException ex) {
// Special case that can be interpreted as a (semantic) parse error.
// Do not include ex as the cause - org.apache.tinkerpop.gremlin.groovy.engine.GremlinExecutor.eval()
// in Tinkerpop v 3.3.0, strips root causes off their outer exception objects.
// We just log ex here for debugging purposes.
log.debug("Converting a java.lang.ClassCastException to GremlinParserException," +
" assuming that it indicates a semantic parse error.", ex);
throw new GremlinParserException("Failed to interpret Gremlin query: " + ex.getMessage());
}

return new Translation(query, visitor.getTranslated(), visitor.getParameters());
}
}
Loading

0 comments on commit ba51c5c

Please sign in to comment.