Skip to content

Commit

Permalink
Fixed quoting
Browse files Browse the repository at this point in the history
  • Loading branch information
Leonard Wolters committed May 28, 2024
1 parent 32eca06 commit 1c5925a
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,31 @@ trait SplitMergeFunctionTokenizer {
def tokenizeSplitMergeFunction(col: SplitMergeFunction[_])(implicit ctx: TokenizeContext): String = col match {
case SplitByChar(sep: StringColMagnet[_], col: StringColMagnet[_]) =>
// Some small optimizations
val separator = tokenizeColumn(sep.column)
if (separator.length == 3) {
val s = separator.charAt(1).toInt
val separator = sep.column match {
case c: Const[_] => c.const.asInstanceOf[String]
}

if (separator.length == 1) {
val s = separator.charAt(0).toInt

// https://en.wikipedia.org/wiki/List_of_Unicode_characters
if (s >= 32 && s <= 126) {
s"splitByChar($separator,${tokenizeColumn(col.column)})"
// 34 == DoubleQuote ("), 39 == Single Quote ('),
// 47 == Forward Slash (/), 92 == Backward Slash (\\)
// 96 == Grave Accent (` under tilde)
if (s == 34 || s == 39 || s == 92 || s == 96) {
s"splitByChar(char($s), ${tokenizeColumn(col.column)})"
} else if (s >= 32 && s <= 126) {
s"splitByChar(${tokenizeColumn(sep.column)}, ${tokenizeColumn(col.column)})"
} else {
s"splitByChar(char($s),${tokenizeColumn(col.column)})"
s"splitByChar(char($s), ${tokenizeColumn(col.column)})"
}
} else {
s"splitByString($separator,${tokenizeColumn(col.column)})"
s"splitByString(${tokenizeColumn(sep.column)}, ${tokenizeColumn(col.column)})"
}
case SplitByString(sep: StringColMagnet[_], col: StringColMagnet[_]) =>
s"splitByString(${tokenizeColumn(sep.column)},${tokenizeColumn(col.column)})"
s"splitByString(${tokenizeColumn(sep.column)}, ${tokenizeColumn(col.column)})"
case ArrayStringConcat(col: ArrayColMagnet[_], sep: StringColMagnet[_]) =>
s"arrayStringConcat(${tokenizeColumn(col.column)},${tokenizeColumn(sep.column)})"
s"arrayStringConcat(${tokenizeColumn(col.column)}, ${tokenizeColumn(sep.column)})"
case AlphaTokens(col: StringColMagnet[_]) => s"alphaTokens(${tokenizeColumn(col.column)})"
}
}
Original file line number Diff line number Diff line change
@@ -1,28 +1,39 @@
package com.crobox.clickhouse.dsl.language

import com.crobox.clickhouse.dsl._
import com.crobox.clickhouse.{dsl, DslTestSpec}
import com.crobox.clickhouse.{DslTestSpec, dsl}

class SplitMergeFunctionTokenizerTest extends DslTestSpec {
val FIELD_DELIMITER: Char = '\u001F'

it should "splitByChar using special character" in {

toSQL(select(dsl.splitByChar(FIELD_DELIMITER.toString, "abcd"))) should matchSQL(
"SELECT splitByChar(char(31),'abcd')"
"SELECT splitByChar(char(31), 'abcd')"
)
toSQL(select(dsl.splitByChar(31.toChar.toString, "abcd"))) should matchSQL("SELECT splitByChar(char(31),'abcd')")
toSQL(select(dsl.splitByChar(32.toChar.toString, "abcd"))) should matchSQL("SELECT splitByChar(' ','abcd')")
toSQL(select(dsl.splitByChar(126.toChar.toString, "abcd"))) should matchSQL("SELECT splitByChar('~','abcd')")
toSQL(select(dsl.splitByChar(127.toChar.toString, "abcd"))) should matchSQL("SELECT splitByChar(char(127),'abcd')")
toSQL(select(dsl.splitByChar(128.toChar.toString, "abcd"))) should matchSQL("SELECT splitByChar(char(128),'abcd')")
toSQL(select(dsl.splitByChar(159.toChar.toString, "abcd"))) should matchSQL("SELECT splitByChar(char(159),'abcd')")
toSQL(select(dsl.splitByChar(31.toChar.toString, "abcd"))) should matchSQL("SELECT splitByChar(char(31), 'abcd')")
toSQL(select(dsl.splitByChar(32.toChar.toString, "abcd"))) should matchSQL("SELECT splitByChar(' ', 'abcd')")
toSQL(select(dsl.splitByChar(126.toChar.toString, "abcd"))) should matchSQL("SELECT splitByChar('~', 'abcd')")
toSQL(select(dsl.splitByChar(127.toChar.toString, "abcd"))) should matchSQL("SELECT splitByChar(char(127), 'abcd')")
toSQL(select(dsl.splitByChar(128.toChar.toString, "abcd"))) should matchSQL("SELECT splitByChar(char(128), 'abcd')")
toSQL(select(dsl.splitByChar(159.toChar.toString, "abcd"))) should matchSQL("SELECT splitByChar(char(159), 'abcd')")

toSQL(select(dsl.splitByChar(",", "abcd"))) should matchSQL("SELECT splitByChar(',','abcd')")
toSQL(select(dsl.splitByChar("a", "abcd"))) should matchSQL("SELECT splitByChar('a','abcd')")
toSQL(select(dsl.splitByChar("ab", "abcd"))) should matchSQL("SELECT splitByString('ab','abcd')")
toSQL(select(dsl.splitByChar("L", "abcd"))) should matchSQL("SELECT splitByChar('L','abcd')")
toSQL(select(dsl.splitByChar("$", "abcd"))) should matchSQL("SELECT splitByChar('$','abcd')")
toSQL(select(dsl.splitByChar("-", "abcd"))) should matchSQL("SELECT splitByChar('-','abcd')")
toSQL(select(dsl.splitByChar(",", "abcd"))) should matchSQL("SELECT splitByChar(',', 'abcd')")
toSQL(select(dsl.splitByChar("a", "abcd"))) should matchSQL("SELECT splitByChar('a', 'abcd')")
toSQL(select(dsl.splitByChar("L", "abcd"))) should matchSQL("SELECT splitByChar('L', 'abcd')")
toSQL(select(dsl.splitByChar("$", "abcd"))) should matchSQL("SELECT splitByChar('$', 'abcd')")
toSQL(select(dsl.splitByChar("-", "abcd"))) should matchSQL("SELECT splitByChar('-', 'abcd')")
toSQL(select(dsl.splitByChar("!", "abcd"))) should matchSQL("SELECT splitByChar('!', 'abcd')")

// special 'not quoted' characters, see ClickhouseStatement.UnquotedIdentifier
toSQL(select(dsl.splitByChar("\"", "abcd"))) should matchSQL("SELECT splitByChar(char(34), 'abcd')")
toSQL(select(dsl.splitByChar("'", "abcd"))) should matchSQL("SELECT splitByChar(char(39), 'abcd')")
toSQL(select(dsl.splitByChar("`", "abcd"))) should matchSQL("SELECT splitByChar(char(96), 'abcd')")
toSQL(select(dsl.splitByChar("\\", "abcd"))) should matchSQL("SELECT splitByChar(char(92), 'abcd')")
toSQL(select(dsl.splitByChar("/", "abcd"))) should matchSQL("SELECT splitByChar('/', 'abcd')")

// Multiple characters
toSQL(select(dsl.splitByChar("ab", "abcd"))) should matchSQL("SELECT splitByString('ab', 'abcd')")
toSQL(select(dsl.splitByChar("a'", "abcd"))) should matchSQL("SELECT splitByString('a\\'', 'abcd')")
}
}

0 comments on commit 1c5925a

Please sign in to comment.