Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Partially supported directives #655

Open
wants to merge 8 commits into
base: relational-directives
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.etl.api.relational.Expression;
import io.cdap.cdap.etl.api.relational.ExpressionFactory;
import io.cdap.cdap.etl.api.relational.InvalidRelation;
import io.cdap.cdap.etl.api.relational.Relation;
import io.cdap.cdap.etl.api.relational.RelationalTranformContext;
import io.cdap.wrangler.api.Arguments;
import io.cdap.wrangler.api.Directive;
import io.cdap.wrangler.api.DirectiveExecutionException;
Expand All @@ -33,8 +38,11 @@
import io.cdap.wrangler.api.parser.Identifier;
import io.cdap.wrangler.api.parser.TokenType;
import io.cdap.wrangler.api.parser.UsageDefinition;
import io.cdap.wrangler.utils.SqlExpressionGenerator;

import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

/**
* This class <code>ChangeColCaseNames</code> converts the case of the columns
Expand Down Expand Up @@ -94,5 +102,29 @@ public Mutation lineage() {
.all(Many.of())
.build();
}

@Override
public Relation transform(RelationalTranformContext relationalTranformContext,
Relation relation) {
java.util.Optional<ExpressionFactory<String>> expressionFactory = SqlExpressionGenerator
.getExpressionFactory(relationalTranformContext);
if (!expressionFactory.isPresent()) {
return new InvalidRelation("Cannot find an Expression Factory");
}
List<String> columnNames = SqlExpressionGenerator.generateListCols(relationalTranformContext);
Map<String, Expression> colmap = generateColumnCaseMap(columnNames, expressionFactory.get());
return relation.select(colmap);
}

private Map<String, Expression> generateColumnCaseMap(List<String> columns, ExpressionFactory<String> factory) {
Map<String, Expression> columnExpMap = new LinkedHashMap<>();
if (toLower) {
columns.forEach((colName) -> columnExpMap.put(colName.toLowerCase(), factory.compile(colName)));
} else {
columns.forEach((colName) -> columnExpMap.put(colName.toUpperCase(), factory.compile(colName)));
}
return columnExpMap;
}

}

Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.etl.api.relational.Expression;
import io.cdap.cdap.etl.api.relational.ExpressionFactory;
import io.cdap.cdap.etl.api.relational.InvalidRelation;
import io.cdap.cdap.etl.api.relational.Relation;
import io.cdap.cdap.etl.api.relational.RelationalTranformContext;
import io.cdap.wrangler.api.Arguments;
import io.cdap.wrangler.api.Directive;
import io.cdap.wrangler.api.DirectiveExecutionException;
Expand All @@ -30,8 +35,12 @@
import io.cdap.wrangler.api.lineage.Many;
import io.cdap.wrangler.api.lineage.Mutation;
import io.cdap.wrangler.api.parser.UsageDefinition;
import io.cdap.wrangler.utils.SqlExpressionGenerator;

import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

/**
* A directive for cleanses columns names.
Expand Down Expand Up @@ -93,4 +102,30 @@ public Mutation lineage() {
.all(Many.of())
.build();
}

@Override
public Relation transform(RelationalTranformContext relationalTranformContext,
Relation relation) {
java.util.Optional<ExpressionFactory<String>> expressionFactory = SqlExpressionGenerator
.getExpressionFactory(relationalTranformContext);
if (!expressionFactory.isPresent()) {
return new InvalidRelation("Cannot find an Expression Factory");
}
List<String> columnNames = SqlExpressionGenerator.generateListCols(relationalTranformContext);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this method gets the list of input columns from the transform context, which is not really related to "generating a SQL expression". I would either suggest keeping this in a different util class or you can just rename the method to getColumnList

Map<String, Expression> colmap = generateCleanseColumnMap(columnNames, expressionFactory.get());
return relation.select(colmap);
}

public static Map<String, Expression> generateCleanseColumnMap(Collection columns,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make private

ExpressionFactory<String> factory) {
Map<String, Expression> columnExpMap = new LinkedHashMap<>();
columns.forEach((colName)-> columnExpMap.put(String
.format(colName
.toString()
.toLowerCase()
.replaceAll("[^a-zA-Z0-9_]", "_")), factory
.compile(colName.toString())));
return columnExpMap;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.etl.api.relational.Expression;
import io.cdap.cdap.etl.api.relational.ExpressionFactory;
import io.cdap.cdap.etl.api.relational.InvalidRelation;
import io.cdap.cdap.etl.api.relational.Relation;
import io.cdap.cdap.etl.api.relational.RelationalTranformContext;
import io.cdap.wrangler.api.Arguments;
import io.cdap.wrangler.api.Directive;
import io.cdap.wrangler.api.DirectiveExecutionException;
Expand All @@ -32,9 +37,15 @@
import io.cdap.wrangler.api.parser.ColumnNameList;
import io.cdap.wrangler.api.parser.TokenType;
import io.cdap.wrangler.api.parser.UsageDefinition;
import io.cdap.wrangler.utils.SqlExpressionGenerator;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

/**
* A directive for setting the columns obtained from wrangling.
Expand Down Expand Up @@ -90,5 +101,27 @@ public Mutation lineage() {
.generate(Many.of(columns))
.build();
}

@Override
public Relation transform(RelationalTranformContext relationalTranformContext,
Relation relation) {
Optional<ExpressionFactory<String>> expressionFactory = SqlExpressionGenerator
.getExpressionFactory(relationalTranformContext);
if (!expressionFactory.isPresent()) {
return new InvalidRelation("Cannot find an Expression Factory");
}
List<String> columnNames = SqlExpressionGenerator.generateListCols(relationalTranformContext);
Map<String, Expression> columnExpMap = new LinkedHashMap<>();

IntStream.range(0, Math.min(columnNames.size(), columns.size()))
.forEach(i -> columnExpMap.put(columns.get(i), expressionFactory.get().compile(columnNames.get(i))));

if (columnNames.size() > columns.size()) {
IntStream.range(columns.size(), columnNames.size())
.forEach(i -> columnExpMap.put(columnNames.get(i), expressionFactory.get().compile(columnNames.get(i))));
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can move to a function with a simple for loop

return relation.select(columnExpMap);
}

}

18 changes: 18 additions & 0 deletions wrangler-core/src/main/java/io/cdap/directives/column/SetType.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.etl.api.relational.ExpressionFactory;
import io.cdap.cdap.etl.api.relational.InvalidRelation;
import io.cdap.cdap.etl.api.relational.Relation;
import io.cdap.cdap.etl.api.relational.RelationalTranformContext;
import io.cdap.wrangler.api.Arguments;
import io.cdap.wrangler.api.Directive;
import io.cdap.wrangler.api.DirectiveExecutionException;
Expand All @@ -36,6 +40,7 @@
import io.cdap.wrangler.api.parser.TokenType;
import io.cdap.wrangler.api.parser.UsageDefinition;
import io.cdap.wrangler.utils.ColumnConverter;
import io.cdap.wrangler.utils.SqlExpressionGenerator;

import java.math.RoundingMode;
import java.util.List;
Expand Down Expand Up @@ -108,4 +113,17 @@ public Mutation lineage() {
.relation(col, col)
.build();
}

@Override
public Relation transform(RelationalTranformContext relationalTranformContext,
Relation relation) {
java.util.Optional<ExpressionFactory<String>> expressionFactory = SqlExpressionGenerator
.getExpressionFactory(relationalTranformContext);
if (!expressionFactory.isPresent()) {
return new InvalidRelation("Cannot find an Expression Factory");
}
String expression = SqlExpressionGenerator.getColumnTypeExp(type, col, scale);
shrverma marked this conversation as resolved.
Show resolved Hide resolved
return relation.setColumn(col, expressionFactory.get().compile(expression));
}

}
19 changes: 19 additions & 0 deletions wrangler-core/src/main/java/io/cdap/directives/date/DiffDate.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.etl.api.relational.ExpressionFactory;
import io.cdap.cdap.etl.api.relational.InvalidRelation;
import io.cdap.cdap.etl.api.relational.Relation;
import io.cdap.cdap.etl.api.relational.RelationalTranformContext;
import io.cdap.wrangler.api.Arguments;
import io.cdap.wrangler.api.Directive;
import io.cdap.wrangler.api.DirectiveExecutionException;
Expand All @@ -32,11 +36,13 @@
import io.cdap.wrangler.api.parser.ColumnName;
import io.cdap.wrangler.api.parser.TokenType;
import io.cdap.wrangler.api.parser.UsageDefinition;
import io.cdap.wrangler.utils.SqlExpressionGenerator;

import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.List;
import java.util.Optional;

/**
* A directive for taking difference in Dates.
Expand Down Expand Up @@ -118,4 +124,17 @@ public Mutation lineage() {
.relation(column2, column2)
.build();
}

@Override
public Relation transform(RelationalTranformContext relationalTranformContext,
Relation relation) {
Optional<ExpressionFactory<String>> expressionFactory = SqlExpressionGenerator
.getExpressionFactory(relationalTranformContext);
if (!expressionFactory.isPresent()) {
return new InvalidRelation("Cannot find an Expression Factory");
}
return relation.setColumn(destCol, expressionFactory.get()
.compile(String.format("datediff(millisecond, timestamp(%s), timestamp(%s))", column2, column1)));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.etl.api.relational.ExpressionFactory;
import io.cdap.cdap.etl.api.relational.InvalidRelation;
import io.cdap.cdap.etl.api.relational.Relation;
import io.cdap.cdap.etl.api.relational.RelationalTranformContext;
import io.cdap.wrangler.api.Arguments;
import io.cdap.wrangler.api.Directive;
import io.cdap.wrangler.api.DirectiveExecutionException;
Expand All @@ -32,6 +36,7 @@
import io.cdap.wrangler.api.parser.Text;
import io.cdap.wrangler.api.parser.TokenType;
import io.cdap.wrangler.api.parser.UsageDefinition;
import io.cdap.wrangler.utils.SqlExpressionGenerator;

import java.time.LocalDate;
import java.time.ZoneId;
Expand All @@ -40,6 +45,7 @@
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;

/**
* A directive for managing date formats.
Expand Down Expand Up @@ -114,4 +120,17 @@ public Mutation lineage() {
.relation(column, column)
.build();
}

@Override
public Relation transform(RelationalTranformContext relationalTranformContext,
Relation relation) {
Optional<ExpressionFactory<String>> expressionFactory = SqlExpressionGenerator
.getExpressionFactory(relationalTranformContext);
if (!expressionFactory.isPresent()) {
return new InvalidRelation("Cannot find an Expression Factory");
}
return relation.setColumn(column, expressionFactory.get()
.compile(String.format("date_format(timestamp(%s), '%s')", column, format)));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.etl.api.relational.ExpressionFactory;
import io.cdap.cdap.etl.api.relational.InvalidRelation;
import io.cdap.cdap.etl.api.relational.Relation;
import io.cdap.cdap.etl.api.relational.RelationalTranformContext;
import io.cdap.wrangler.api.Arguments;
import io.cdap.wrangler.api.Directive;
import io.cdap.wrangler.api.DirectiveExecutionException;
Expand All @@ -33,6 +37,7 @@
import io.cdap.wrangler.api.parser.Text;
import io.cdap.wrangler.api.parser.TokenType;
import io.cdap.wrangler.api.parser.UsageDefinition;
import io.cdap.wrangler.utils.SqlExpressionGenerator;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
Expand Down Expand Up @@ -119,4 +124,17 @@ public Mutation lineage() {
.relation(column, column)
.build();
}

@Override
public Relation transform(RelationalTranformContext relationalTranformContext,
Relation relation) {
java.util.Optional<ExpressionFactory<String>> expressionFactory = SqlExpressionGenerator
.getExpressionFactory(relationalTranformContext);
if (!expressionFactory.isPresent()) {
return new InvalidRelation("Cannot find an Expression Factory");
}
return relation.setColumn(column, expressionFactory.get().compile(String
.format("decode(%s, '%s'))", column, charset)));
}

}
21 changes: 21 additions & 0 deletions wrangler-core/src/main/java/io/cdap/directives/row/Fail.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.etl.api.relational.ExpressionFactory;
import io.cdap.cdap.etl.api.relational.InvalidRelation;
import io.cdap.cdap.etl.api.relational.Relation;
import io.cdap.cdap.etl.api.relational.RelationalTranformContext;
import io.cdap.wrangler.api.Arguments;
import io.cdap.wrangler.api.Directive;
import io.cdap.wrangler.api.DirectiveExecutionException;
Expand All @@ -37,6 +41,7 @@
import io.cdap.wrangler.expression.ELContext;
import io.cdap.wrangler.expression.ELException;
import io.cdap.wrangler.expression.ELResult;
import io.cdap.wrangler.utils.SqlExpressionGenerator;

import java.util.List;

Expand Down Expand Up @@ -116,4 +121,20 @@ public List<EntityCountMetric> getCountMetrics() {
EntityCountMetric jexlCategoryMetric = getJexlCategoryMetric(el.getScriptParsedText());
return (jexlCategoryMetric == null) ? null : ImmutableList.of(jexlCategoryMetric);
}

@Override
public Relation transform(RelationalTranformContext relationalTranformContext,
Relation relation) {
java.util.Optional<ExpressionFactory<String>> expressionFactory = SqlExpressionGenerator
.getExpressionFactory(relationalTranformContext);
if (!expressionFactory.isPresent()) {
return new InvalidRelation("Cannot find an Expression Factory");
}

return relation.setColumn("tempColumn", expressionFactory.get().compile(
String.format("if(%s, raise_error(\"Condition '%s' evaluated to true. " +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can move this to a variable

"Terminating processing.\"), %s)", el.getScriptParsedText(),
el.getScriptParsedText(), el.getScriptParsedText())));
}

}
Loading