Update doc

PHILO-HE · May 6, 2024 · e661294 · e661294
1 parent 3c5f101
commit e661294
Show file tree

Hide file tree

Showing 7 changed files with 170 additions and 3 deletions.
diff --git a/velox/docs/functions/spark/array.rst b/velox/docs/functions/spark/array.rst
@@ -21,13 +21,31 @@ Array Functions
 
         SELECT array_contains(array(1, 2, 3), 2); -- true
 
+.. spark::function:: array_distinct(array(E)) -> array(E)
+
+    Remove duplicate values from the input array. ::
+
+        SELECT array_distinct(ARRAY [1, 2, 3]); -- [1, 2, 3]
+        SELECT array_distinct(ARRAY [1, 2, 1]); -- [1, 2]
+        SELECT array_distinct(ARRAY [1, NULL, NULL]); -- [1, NULL]
+
+.. spark::function:: array_except(array(E) x, array(E) y) -> array(E)
+
+    Returns an array of the elements in array ``x`` but not in array ``y``, without duplicates. ::
+
+        SELECT array_except(ARRAY [1, 2, 3], ARRAY [4, 5, 6]); -- [1, 2, 3]
+        SELECT array_except(ARRAY [1, 2, 3], ARRAY [1, 2]); -- [3]
+        SELECT array_except(ARRAY [1, 2, 2], ARRAY [1, 1, 2]); -- []
+        SELECT array_except(ARRAY [1, 2, 2], ARRAY [1, 3, 4]); -- [2]
+        SELECT array_except(ARRAY [1, NULL, NULL], ARRAY [1, 1, NULL]); -- []
+
 .. spark:function:: array_intersect(array(E), array(E1)) -> array(E2)
 
     Returns an array of the elements in the intersection of array1 and array2, without duplicates. ::
 
         SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)); -- [1,3]
 
-.. function:: array_max(array(E)) -> E
+.. spark::function:: array_max(array(E)) -> E
 
     Returns maximum non-NULL element of the array. Returns NULL if array is empty or all elements are NULL.
     When E is DOUBLE or REAL, returns NaN if any element is NaN. ::
@@ -38,7 +56,7 @@ Array Functions
         SELECT array_max(array()); -- NULL
         SELECT array_max(array(-0.0001, -0.0002, -0.0003, float('nan'))); -- NaN
 
-.. function:: array_min(array(E)) -> E
+.. spark::function:: array_min(array(E)) -> E
 
     Returns minimum non-NULL element of the array. Returns NULL if array is empty or all elements are NULL.
     When E is DOUBLE or REAL, NaN value is considered greater than any non-NaN value. ::
@@ -51,7 +69,15 @@ Array Functions
         SELECT array_min(array(4.0, float('nan')]); -- 4.0
         SELECT array_min(array(NULL, float('nan'))); -- NaN
 
-.. function:: array_remove(x, element) -> array
+.. spark::function:: array_position(x, element) -> bigint
+
+    Returns the position (1-based) of the first occurrence of the ``element`` in array ``x`` (or 0 if not found). ::
+
+        SELECT array_position(array(1, 2, 3), 2); -- 2
+        SELECT array_position(array(1, 2, 3), 4); -- 0
+        SELECT array_position(array(1, 2, 3, 2), 2); -- 2
+
+.. spark::function:: array_remove(x, element) -> array
 
     Remove all elements that equal ``element`` from array ``x``. Returns NULL as result if ``element`` is NULL.
     If array ``x`` is empty array, returns empty array. If all elements in array ``x`` are NULL but ``element`` is not NULL,

diff --git a/velox/docs/functions/spark/binary.rst b/velox/docs/functions/spark/binary.rst
@@ -2,6 +2,10 @@
 Binary Functions
 ================
 
+.. spark::function:: crc32(binary) -> bigint
+
+    Computes the crc32 checksum of ``binary``.
+
 .. spark:function:: hash(x, ...) -> integer
 
     Computes the hash of one or more input values using seed value of 42. For

diff --git a/velox/docs/functions/spark/map.rst b/velox/docs/functions/spark/map.rst
@@ -14,6 +14,12 @@ Map Functions
 
         SELECT map(array(1, 2), array(3, 4)); -- {[1, 2] -> [3, 4]}
 
+.. spark::function:: map_entries(map(K,V)) -> array(row(K,V))
+
+    Returns an array of all entries in the given map. ::
+
+        SELECT map_entries(MAP(ARRAY[1, 2], ARRAY['x', 'y'])); -- [ROW(1, 'x'), ROW(2, 'y')]
+
 .. spark:function:: map_filter(map(K,V), func) -> map(K,V)
 
     Filters entries in a map using the function. ::
@@ -27,6 +33,14 @@ Map Functions
 
         SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')); -- {1.0 -> 2, 3.0 -> 4}
 
+.. spark::function:: map_keys(x(K,V)) -> array(K)
+
+    Returns all the keys in the map ``x``.
+
+.. spark::function:: map_values(x(K,V)) -> array(V)
+
+    Returns all the values in the map ``x``.
+
 .. spark:function:: size(map(K,V)) -> bigint
    :noindex:
 

diff --git a/velox/docs/functions/spark/math.rst b/velox/docs/functions/spark/math.rst
@@ -14,10 +14,18 @@ Mathematical Functions
 
     Returns inverse hyperbolic cosine of ``x``.
 
+.. spark::function:: asin(x) -> double
+
+    Returns the arc sine of ``x``.
+
 .. spark:function:: asinh(x) -> double
 
     Returns inverse hyperbolic sine of ``x``.
 
+.. spark::function:: atan(x) -> double
+
+    Returns the arc tangent of ``x``.
+
 .. spark:function:: atan2(y, x) -> double
 
     Returns the arc tangent of ``y / x``. For compatibility with Spark, returns 0 for the following corner cases:
@@ -58,6 +66,10 @@ Mathematical Functions
     Returns ``x`` rounded up to the nearest integer.  
     Supported types are: BIGINT and DOUBLE.
 
+.. spark::function:: cos(x) -> double
+
+    Returns the cosine of ``x``.
+
 .. spark:function:: cosh(x) -> double
 
     Returns the hyperbolic cosine of ``x``.
@@ -70,6 +82,10 @@ Mathematical Functions
 
     Returns the cosecant of ``x``.
 
+.. spark::function:: degrees(x) -> double
+
+    Converts angle x in radians to degrees.
+
 .. spark:function:: divide(x, y) -> double
 
     Returns the results of dividing x by y. Performs floating point division.

diff --git a/velox/docs/functions/spark/regexp.rst b/velox/docs/functions/spark/regexp.rst
@@ -15,6 +15,28 @@ difference of the character classes.
 
 See https://github.com/google/re2/wiki/Syntax for more information.
 
+.. spark::function:: like(string, pattern) -> boolean
+                     like(string, pattern, escape) -> boolean
+
+    Evaluates if the ``string`` matches the ``pattern``. Patterns can contain
+    regular characters as well as wildcards. Wildcard characters can be escaped
+    using the single character specified for the ``escape`` parameter. Only ASCII
+    characters are supported for the ``escape`` parameter. Matching is case sensitive.
+
+    Note: The wildcard '%' represents 0, 1 or multiple characters and the
+    wildcard '_' represents exactly one character.
+
+    Note: Each function instance allow for a maximum of 20 regular expressions to
+    be compiled per thread of execution. Not all patterns require
+    compilation of regular expressions. Patterns 'hello', 'hello%', '_hello__%',
+    '%hello', '%__hello_', '%hello%', where 'hello', 'velox'
+    contains only regular characters and '_' wildcards are evaluated without
+    using regular expressions. Only those patterns that require the compilation of
+    regular expressions are counted towards the limit.
+
+        SELECT like('abc', '%b%'); -- true
+        SELECT like('a_c', '%#_%', '#'); -- true
+
 .. spark:function:: regexp_extract(string, pattern) -> varchar
 
     Returns the first substring matched by the regular expression ``pattern``
@@ -36,6 +58,21 @@ See https://github.com/google/re2/wiki/Syntax for more information.
 
         SELECT regexp_extract('1a 2b 14m', '(\d+)([a-z]+)', 2); -- 'a'
 
+.. spark::function:: regexp_extract_all(string, pattern) -> array(varchar):
+
+    Returns the substring(s) matched by the regular expression ``pattern``
+    in ``string``::
+
+        SELECT regexp_extract_all('1a 2b 14m', '\d+'); -- [1, 2, 14]
+
+.. spark::function:: regexp_extract_all(string, pattern, group) -> array(varchar):
+    :noindex:
+
+    Finds all occurrences of the regular expression ``pattern`` in
+    ``string`` and returns the capturing group number ``group``::
+
+        SELECT regexp_extract_all('1a 2b 14m', '(\d+)([a-z]+)', 2); -- ['a', 'b', 'm']
+
 .. spark:function:: rlike(string, pattern) -> boolean
 
     Evaluates the regular expression ``pattern`` and determines if it is

diff --git a/velox/docs/functions/spark/string.rst b/velox/docs/functions/spark/string.rst
@@ -158,6 +158,10 @@ Unless specified otherwise, all functions return NULL if at least one of the arg
         SELECT replace('ABCabc', 'abc', ''); -- ABC
         SELECT replace('ABCabc', 'abc', 'DEF'); -- ABCDEF
 
+.. spark::function:: reverse(string) -> varchar
+
+    Returns input string with characters in reverse order.
+
 .. spark:function:: rpad(string, len, pad) -> string
     
     Returns ``string``, right-padded with ``pad`` to a length of ``len``. 

diff --git a/velox/docs/functions/spark/url.rst b/velox/docs/functions/spark/url.rst
@@ -0,0 +1,66 @@
+=============
+URL Functions
+=============
+
+Introduction
+------------
+
+The URL extraction functions extract components from HTTP URLs (or any valid URIs conforming to `RFC 3986 <https://tools.ietf.org/html/rfc3986.html>`_). The following syntax is supported:
+
+.. code-block:: bash
+
+    [protocol:][//host[:port]][path][?query][#fragment]
+
+
+The extracted components do not contain URI syntax separators such as ``:`` , ``?`` and ``#``.
+
+Consider for example the below URI:
+
+.. code-block::
+
+    http://www.ics.uci.edu/pub/ietf/uri/?k1=v1#Related
+
+    scheme    = http
+    authority = www.ics.uci.edu
+    path      = /pub/ietf/uri/
+    query     = k1=v1
+    fragment  = Related
+
+
+Invalid URI's
+-------------
+
+Well formed URI's should not contain ascii whitespace. `Percent-encoded URI's <https://www.rfc-editor.org/rfc/rfc3986#section-2.1>`_ should be followed by two hexadecimal
+digits after the percent character "%". All the url extract functions will return null when passed an invalid uri.
+
+.. code-block::
+
+    # Examples of url functions with Invalid URI's.
+
+    # Invalid URI due to whitespace
+    SELECT url_extract_path('foo '); -- NULL (1 row)
+    SELECT url_extract_host('http://www.foo.com '); -- NULL (1 row)
+
+    # Invalid URI due to improper escaping of '%'
+    SELECT url_extract_path('https://www.ucu.edu.uy/agenda/evento/%%UCUrlCompartir%%'); -- NULL (1 row)
+    SELECT url_extract_host('https://www.ucu.edu.uy/agenda/evento/%%UCUrlCompartir%%'); -- NULL (1 row)
+
+Encoding Functions
+------------------
+
+.. spark::function:: url_encode(value) -> varchar
+
+    Escapes ``value`` by encoding it so that it can be safely included in
+    URL query parameter names and values:
+
+    * Alphanumeric characters are not encoded.
+    * The characters ``.``, ``-``, ``*`` and ``_`` are not encoded.
+    * The ASCII space character is encoded as ``+``.
+    * All other characters are converted to UTF-8 and the bytes are encoded
+      as the string ``%XX`` where ``XX`` is the uppercase hexadecimal
+      value of the UTF-8 byte.
+
+.. spark::function:: url_decode(value) -> varchar
+
+    Unescapes the URL encoded ``value``.
+    This function is the inverse of :func:`url_encode`.