diff --git a/velox/docs/functions/spark/array.rst b/velox/docs/functions/spark/array.rst index aa5932e16732..1c4ea9eb9961 100644 --- a/velox/docs/functions/spark/array.rst +++ b/velox/docs/functions/spark/array.rst @@ -21,13 +21,31 @@ Array Functions SELECT array_contains(array(1, 2, 3), 2); -- true +.. spark::function:: array_distinct(array(E)) -> array(E) + + Remove duplicate values from the input array. :: + + SELECT array_distinct(ARRAY [1, 2, 3]); -- [1, 2, 3] + SELECT array_distinct(ARRAY [1, 2, 1]); -- [1, 2] + SELECT array_distinct(ARRAY [1, NULL, NULL]); -- [1, NULL] + +.. spark::function:: array_except(array(E) x, array(E) y) -> array(E) + + Returns an array of the elements in array ``x`` but not in array ``y``, without duplicates. :: + + SELECT array_except(ARRAY [1, 2, 3], ARRAY [4, 5, 6]); -- [1, 2, 3] + SELECT array_except(ARRAY [1, 2, 3], ARRAY [1, 2]); -- [3] + SELECT array_except(ARRAY [1, 2, 2], ARRAY [1, 1, 2]); -- [] + SELECT array_except(ARRAY [1, 2, 2], ARRAY [1, 3, 4]); -- [2] + SELECT array_except(ARRAY [1, NULL, NULL], ARRAY [1, 1, NULL]); -- [] + .. spark:function:: array_intersect(array(E), array(E1)) -> array(E2) Returns an array of the elements in the intersection of array1 and array2, without duplicates. :: SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)); -- [1,3] -.. function:: array_max(array(E)) -> E +.. spark::function:: array_max(array(E)) -> E Returns maximum non-NULL element of the array. Returns NULL if array is empty or all elements are NULL. When E is DOUBLE or REAL, returns NaN if any element is NaN. :: @@ -38,7 +56,7 @@ Array Functions SELECT array_max(array()); -- NULL SELECT array_max(array(-0.0001, -0.0002, -0.0003, float('nan'))); -- NaN -.. function:: array_min(array(E)) -> E +.. spark::function:: array_min(array(E)) -> E Returns minimum non-NULL element of the array. Returns NULL if array is empty or all elements are NULL. When E is DOUBLE or REAL, NaN value is considered greater than any non-NaN value. :: @@ -51,7 +69,15 @@ Array Functions SELECT array_min(array(4.0, float('nan')]); -- 4.0 SELECT array_min(array(NULL, float('nan'))); -- NaN -.. function:: array_remove(x, element) -> array +.. spark::function:: array_position(x, element) -> bigint + + Returns the position (1-based) of the first occurrence of the ``element`` in array ``x`` (or 0 if not found). :: + + SELECT array_position(array(1, 2, 3), 2); -- 2 + SELECT array_position(array(1, 2, 3), 4); -- 0 + SELECT array_position(array(1, 2, 3, 2), 2); -- 2 + +.. spark::function:: array_remove(x, element) -> array Remove all elements that equal ``element`` from array ``x``. Returns NULL as result if ``element`` is NULL. If array ``x`` is empty array, returns empty array. If all elements in array ``x`` are NULL but ``element`` is not NULL, diff --git a/velox/docs/functions/spark/binary.rst b/velox/docs/functions/spark/binary.rst index 8a8ef64039af..4f76b29c1148 100644 --- a/velox/docs/functions/spark/binary.rst +++ b/velox/docs/functions/spark/binary.rst @@ -2,6 +2,10 @@ Binary Functions ================ +.. spark::function:: crc32(binary) -> bigint + + Computes the crc32 checksum of ``binary``. + .. spark:function:: hash(x, ...) -> integer Computes the hash of one or more input values using seed value of 42. For diff --git a/velox/docs/functions/spark/map.rst b/velox/docs/functions/spark/map.rst index 1a995eca5c03..02537e05d573 100644 --- a/velox/docs/functions/spark/map.rst +++ b/velox/docs/functions/spark/map.rst @@ -14,6 +14,12 @@ Map Functions SELECT map(array(1, 2), array(3, 4)); -- {[1, 2] -> [3, 4]} +.. spark::function:: map_entries(map(K,V)) -> array(row(K,V)) + + Returns an array of all entries in the given map. :: + + SELECT map_entries(MAP(ARRAY[1, 2], ARRAY['x', 'y'])); -- [ROW(1, 'x'), ROW(2, 'y')] + .. spark:function:: map_filter(map(K,V), func) -> map(K,V) Filters entries in a map using the function. :: @@ -27,6 +33,14 @@ Map Functions SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')); -- {1.0 -> 2, 3.0 -> 4} +.. spark::function:: map_keys(x(K,V)) -> array(K) + + Returns all the keys in the map ``x``. + +.. spark::function:: map_values(x(K,V)) -> array(V) + + Returns all the values in the map ``x``. + .. spark:function:: size(map(K,V)) -> bigint :noindex: diff --git a/velox/docs/functions/spark/math.rst b/velox/docs/functions/spark/math.rst index 8536f18f4842..6e7ec9ee5893 100644 --- a/velox/docs/functions/spark/math.rst +++ b/velox/docs/functions/spark/math.rst @@ -14,10 +14,18 @@ Mathematical Functions Returns inverse hyperbolic cosine of ``x``. +.. spark::function:: asin(x) -> double + + Returns the arc sine of ``x``. + .. spark:function:: asinh(x) -> double Returns inverse hyperbolic sine of ``x``. +.. spark::function:: atan(x) -> double + + Returns the arc tangent of ``x``. + .. spark:function:: atan2(y, x) -> double Returns the arc tangent of ``y / x``. For compatibility with Spark, returns 0 for the following corner cases: @@ -58,6 +66,10 @@ Mathematical Functions Returns ``x`` rounded up to the nearest integer. Supported types are: BIGINT and DOUBLE. +.. spark::function:: cos(x) -> double + + Returns the cosine of ``x``. + .. spark:function:: cosh(x) -> double Returns the hyperbolic cosine of ``x``. @@ -70,6 +82,10 @@ Mathematical Functions Returns the cosecant of ``x``. +.. spark::function:: degrees(x) -> double + + Converts angle x in radians to degrees. + .. spark:function:: divide(x, y) -> double Returns the results of dividing x by y. Performs floating point division. diff --git a/velox/docs/functions/spark/regexp.rst b/velox/docs/functions/spark/regexp.rst index 57e168fc9d74..11ce78bf0134 100644 --- a/velox/docs/functions/spark/regexp.rst +++ b/velox/docs/functions/spark/regexp.rst @@ -15,6 +15,28 @@ difference of the character classes. See https://github.com/google/re2/wiki/Syntax for more information. +.. spark::function:: like(string, pattern) -> boolean + like(string, pattern, escape) -> boolean + + Evaluates if the ``string`` matches the ``pattern``. Patterns can contain + regular characters as well as wildcards. Wildcard characters can be escaped + using the single character specified for the ``escape`` parameter. Only ASCII + characters are supported for the ``escape`` parameter. Matching is case sensitive. + + Note: The wildcard '%' represents 0, 1 or multiple characters and the + wildcard '_' represents exactly one character. + + Note: Each function instance allow for a maximum of 20 regular expressions to + be compiled per thread of execution. Not all patterns require + compilation of regular expressions. Patterns 'hello', 'hello%', '_hello__%', + '%hello', '%__hello_', '%hello%', where 'hello', 'velox' + contains only regular characters and '_' wildcards are evaluated without + using regular expressions. Only those patterns that require the compilation of + regular expressions are counted towards the limit. + + SELECT like('abc', '%b%'); -- true + SELECT like('a_c', '%#_%', '#'); -- true + .. spark:function:: regexp_extract(string, pattern) -> varchar Returns the first substring matched by the regular expression ``pattern`` @@ -36,6 +58,21 @@ See https://github.com/google/re2/wiki/Syntax for more information. SELECT regexp_extract('1a 2b 14m', '(\d+)([a-z]+)', 2); -- 'a' +.. spark::function:: regexp_extract_all(string, pattern) -> array(varchar): + + Returns the substring(s) matched by the regular expression ``pattern`` + in ``string``:: + + SELECT regexp_extract_all('1a 2b 14m', '\d+'); -- [1, 2, 14] + +.. spark::function:: regexp_extract_all(string, pattern, group) -> array(varchar): + :noindex: + + Finds all occurrences of the regular expression ``pattern`` in + ``string`` and returns the capturing group number ``group``:: + + SELECT regexp_extract_all('1a 2b 14m', '(\d+)([a-z]+)', 2); -- ['a', 'b', 'm'] + .. spark:function:: rlike(string, pattern) -> boolean Evaluates the regular expression ``pattern`` and determines if it is diff --git a/velox/docs/functions/spark/string.rst b/velox/docs/functions/spark/string.rst index 817eaad34ab2..f661811e288d 100644 --- a/velox/docs/functions/spark/string.rst +++ b/velox/docs/functions/spark/string.rst @@ -158,6 +158,10 @@ Unless specified otherwise, all functions return NULL if at least one of the arg SELECT replace('ABCabc', 'abc', ''); -- ABC SELECT replace('ABCabc', 'abc', 'DEF'); -- ABCDEF +.. spark::function:: reverse(string) -> varchar + + Returns input string with characters in reverse order. + .. spark:function:: rpad(string, len, pad) -> string Returns ``string``, right-padded with ``pad`` to a length of ``len``. diff --git a/velox/docs/functions/spark/url.rst b/velox/docs/functions/spark/url.rst new file mode 100644 index 000000000000..900aee27610a --- /dev/null +++ b/velox/docs/functions/spark/url.rst @@ -0,0 +1,66 @@ +============= +URL Functions +============= + +Introduction +------------ + +The URL extraction functions extract components from HTTP URLs (or any valid URIs conforming to `RFC 3986 `_). The following syntax is supported: + +.. code-block:: bash + + [protocol:][//host[:port]][path][?query][#fragment] + + +The extracted components do not contain URI syntax separators such as ``:`` , ``?`` and ``#``. + +Consider for example the below URI: + +.. code-block:: + + http://www.ics.uci.edu/pub/ietf/uri/?k1=v1#Related + + scheme = http + authority = www.ics.uci.edu + path = /pub/ietf/uri/ + query = k1=v1 + fragment = Related + + +Invalid URI's +------------- + +Well formed URI's should not contain ascii whitespace. `Percent-encoded URI's `_ should be followed by two hexadecimal +digits after the percent character "%". All the url extract functions will return null when passed an invalid uri. + +.. code-block:: + + # Examples of url functions with Invalid URI's. + + # Invalid URI due to whitespace + SELECT url_extract_path('foo '); -- NULL (1 row) + SELECT url_extract_host('http://www.foo.com '); -- NULL (1 row) + + # Invalid URI due to improper escaping of '%' + SELECT url_extract_path('https://www.ucu.edu.uy/agenda/evento/%%UCUrlCompartir%%'); -- NULL (1 row) + SELECT url_extract_host('https://www.ucu.edu.uy/agenda/evento/%%UCUrlCompartir%%'); -- NULL (1 row) + +Encoding Functions +------------------ + +.. spark::function:: url_encode(value) -> varchar + + Escapes ``value`` by encoding it so that it can be safely included in + URL query parameter names and values: + + * Alphanumeric characters are not encoded. + * The characters ``.``, ``-``, ``*`` and ``_`` are not encoded. + * The ASCII space character is encoded as ``+``. + * All other characters are converted to UTF-8 and the bytes are encoded + as the string ``%XX`` where ``XX`` is the uppercase hexadecimal + value of the UTF-8 byte. + +.. spark::function:: url_decode(value) -> varchar + + Unescapes the URL encoded ``value``. + This function is the inverse of :func:`url_encode`.