diff --git a/doc/reference.md b/doc/reference.md index 032e1774..c7c0b500 100644 --- a/doc/reference.md +++ b/doc/reference.md @@ -533,74 +533,31 @@ BITYPE ::= "void" | "bool" | "i8" | "byte" | "i32" | "char" | "u32" | "int" | EOS ::= '\n' | ';' <> ``` -# Builtin methods used by the language -- $repr - # Features to re-add from old con4m - VM save restore - Arg parsing -- Libraries -- Doc API. -- Checkpointing -- Hot loading -- Finish data types (date, ip, and extra hatrack stuff) +- Change $result back to result (or alias?) +- Libraries (but only the ones Chalk needs) - Final mile: specs - Final mile: params -- Auto-import standard library. +- Doc API. - Callbacks -- Len, etc. +- Make litmods optional when types are declared. # Items for afterward - Objects - Folding - Casting +- Mixed - Varargs functions -- Clean up unused instructions in VM -- Remove the two-words-per-stack-slot thing; it's not needed anymore. - automatic logfd + optional server for log messages - REPL - Keyword arguments - 'maybe' types / nil - Aspects (before / after / around pattern(sig) when x) -- Casting -- (Possibly) re-add := literals - Threading -- Mixed - Pretty printing w/ type annotations - Language server - Checks based on PDG - Full-program info on unused fields & fns. -# Features removed (considered for adding back in) -Also, the language accepts ":=", which has the special syntax of -generating a "=" operator, then tries to automatically determine the -(non-numeric) literal type automatically by running through a -prioritized list of literal parsers (currently intended for the -config-focused literals). - -For instance: - -``` -now := 11 Jan 2024 -``` - -Is equal to: - -``` -now = "11 Jan 2024"'date -``` - -Note that currently, without a literal modifier, "" always assume -'string', so the following does NOT work: - -``` -var now: date -now = "11 Jan 2024" -``` - -This gives a string vs. date type error. `:=` is the thing that says, -"try to be smart!" - -The lexer actually does the conversion, adding an assignment token and the -`OTHERLIT` token (which maybe we should change to be more descriptive?) - diff --git a/include/adts/datetime.h b/include/adts/datetime.h new file mode 100644 index 00000000..1418c508 --- /dev/null +++ b/include/adts/datetime.h @@ -0,0 +1,14 @@ +#pragma once +#include "con4m.h" + +typedef struct c4m_date_time_t { + struct tm dt; + uint64_t fracsec; + unsigned int have_time : 1; + unsigned int have_sec : 1; + unsigned int have_frac_sec : 1; + unsigned int have_month : 1; + unsigned int have_year : 1; + unsigned int have_day : 1; + unsigned int have_offset : 1; +} c4m_date_time_t; diff --git a/include/adts/duration.h b/include/adts/duration.h new file mode 100644 index 00000000..03f25ae8 --- /dev/null +++ b/include/adts/duration.h @@ -0,0 +1,12 @@ +#pragma once +#include "con4m.h" + +extern c4m_duration_t *c4m_now(void); +extern c4m_duration_t *c4m_timestamp(void); +extern c4m_duration_t *c4m_process_cpu(void); +extern c4m_duration_t *c4m_thread_cpu(void); +extern c4m_duration_t *c4m_uptime(void); +extern c4m_duration_t *c4m_program_clock(void); +extern void c4m_init_program_timestamp(void); +extern c4m_duration_t *c4m_duration_diff(c4m_duration_t *, + c4m_duration_t *); diff --git a/include/adts/ipaddr.h b/include/adts/ipaddr.h new file mode 100644 index 00000000..6947f241 --- /dev/null +++ b/include/adts/ipaddr.h @@ -0,0 +1,12 @@ +#pragma once +#include "con4m.h" + +// I realize some of this is redundant, but it's just easier. +typedef struct { + char addr[sizeof(struct sockaddr_in6)]; + uint16_t port; + int32_t af; +} c4m_ipaddr_t; + +extern void +c4m_ipaddr_set_address(c4m_ipaddr_t *obj, c4m_str_t *s, uint16_t port); diff --git a/include/adts/list.h b/include/adts/list.h index 243028ba..ea751458 100644 --- a/include/adts/list.h +++ b/include/adts/list.h @@ -24,3 +24,4 @@ extern bool c4m_list_contains(c4m_list_t *, c4m_obj_t); extern c4m_list_t *c4m_list_copy(c4m_list_t *); extern c4m_list_t *c4m_list_shallow_copy(c4m_list_t *); extern void c4m_list_sort(c4m_list_t *, c4m_sort_fn); +extern void c4m_list_resize(c4m_list_t *, size_t); diff --git a/include/adts/string.h b/include/adts/string.h index c6bb7b8c..c3ffc0a8 100644 --- a/include/adts/string.h +++ b/include/adts/string.h @@ -24,7 +24,7 @@ extern c4m_utf8_t *c4m_rich(c4m_utf8_t *, c4m_utf8_t *style); extern c4m_codepoint_t c4m_index(const c4m_str_t *, int64_t); extern bool c4m_str_can_coerce_to(c4m_type_t *, c4m_type_t *); extern c4m_obj_t c4m_str_coerce_to(const c4m_str_t *, c4m_type_t *); -extern c4m_list_t *c4m_str_xsplit(c4m_str_t *, c4m_str_t *); +extern c4m_list_t *c4m_str_split(c4m_str_t *, c4m_str_t *); extern struct flexarray_t *c4m_str_fsplit(c4m_str_t *, c4m_str_t *); extern bool c4m_str_starts_with(const c4m_str_t *, const c4m_str_t *); @@ -37,7 +37,6 @@ extern c4m_utf32_t *c4m_title_case(c4m_str_t *); extern c4m_str_t *c4m_str_pad(c4m_str_t *, int64_t); extern c4m_utf8_t *c4m_str_to_hex(c4m_str_t *, bool); -#define c4m_str_split(x, y) c4m_str_xsplit(x, y) // This is in richlit.c extern c4m_utf8_t *c4m_rich_lit(char *); diff --git a/include/compiler/dt_errors.h b/include/compiler/dt_errors.h index 944bfbfb..a791f212 100644 --- a/include/compiler/dt_errors.h +++ b/include/compiler/dt_errors.h @@ -185,6 +185,12 @@ typedef enum { c4m_internal_type_error, c4m_err_concrete_index, c4m_err_non_dict_index_type, + c4m_err_invalid_ip, + c4m_err_invalid_dt_spec, + c4m_err_invalid_date_spec, + c4m_err_invalid_time_spec, + c4m_err_invalid_size_lit, + c4m_err_invalid_duration_lit, #ifdef C4M_DEV c4m_err_void_print, #endif diff --git a/include/con4m.h b/include/con4m.h index 2fcedbfc..e873bea9 100644 --- a/include/con4m.h +++ b/include/con4m.h @@ -1,5 +1,4 @@ #pragma once - #include "con4m/config.h" // Useful options (mainly for dev) are commented out here. // The logic below (and into the relevent header files) sets up defaults. @@ -115,6 +114,9 @@ #include "adts/dict.h" #include "adts/set.h" +#include "adts/ipaddr.h" +#include "adts/datetime.h" +#include "adts/duration.h" #include "core/ffi.h" #include "util/watch.h" diff --git a/include/con4m/base.h b/include/con4m/base.h index 3b8ac3ba..69cecb4c 100644 --- a/include/con4m/base.h +++ b/include/con4m/base.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/include/con4m/datatypes.h b/include/con4m/datatypes.h index 1efa11b5..e7422dc3 100644 --- a/include/con4m/datatypes.h +++ b/include/con4m/datatypes.h @@ -39,6 +39,9 @@ typedef struct hatrack_set_st c4m_set_t; #include "compiler/dt_module.h" #include "compiler/dt_compile.h" +typedef uint64_t c4m_size_t; +typedef struct timespec c4m_duration_t; + typedef c4m_str_t *(*c4m_repr_fn)(c4m_obj_t); typedef void (*c4m_marshal_fn)(c4m_obj_t, c4m_stream_t *, diff --git a/include/core/gc.h b/include/core/gc.h index 3398ce78..361fd5b3 100644 --- a/include/core/gc.h +++ b/include/core/gc.h @@ -244,6 +244,7 @@ extern void c4m_get_heap_bounds(uint64_t *, uint64_t *, uint64_t *); extern void c4m_gc_register_collect_fns(c4m_gc_hook, c4m_gc_hook); extern c4m_alloc_hdr *c4m_find_alloc(void *); extern bool c4m_in_heap(void *); +extern void c4m_header_gc_bits(uint64_t *, c4m_base_obj_t *); #ifdef C4M_GC_STATS uint64_t c4m_get_alloc_counter(); diff --git a/include/core/init.h b/include/core/init.h index 4c759b68..deb687ad 100644 --- a/include/core/init.h +++ b/include/core/init.h @@ -7,36 +7,36 @@ extern char **c4m_stashed_argv; extern char **c4m_stashed_envp; static inline char ** -c4m_raw_argv() +c4m_raw_argv(void) { return c4m_stashed_argv; } static inline char ** -c4m_raw_envp() +c4m_raw_envp(void) { return c4m_stashed_envp; } -extern c4m_list_t *c4m_get_program_arguments(); -extern c4m_utf8_t *c4m_get_argv0(); +extern c4m_list_t *c4m_get_program_arguments(void); +extern c4m_utf8_t *c4m_get_argv0(void); extern c4m_utf8_t *c4m_get_env(c4m_utf8_t *); -extern c4m_dict_t *c4m_environment(); +extern c4m_dict_t *c4m_environment(void); extern c4m_utf8_t *c4m_path_search(c4m_utf8_t *, c4m_utf8_t *); -extern c4m_utf8_t *c4m_con4m_root(); -c4m_utf8_t *c4m_system_module_path(); - +extern c4m_utf8_t *c4m_con4m_root(void); +extern c4m_utf8_t *c4m_system_module_path(void); +extern void c4m_add_static_symbols(void); extern c4m_list_t *con4m_path; extern c4m_set_t *con4m_extensions; static inline c4m_list_t * -c4m_get_module_search_path() +c4m_get_module_search_path(void) { return con4m_path; } static inline c4m_set_t * -c4m_get_allowed_file_extensions() +c4m_get_allowed_file_extensions(void) { return con4m_extensions; } diff --git a/include/core/literal.h b/include/core/literal.h index 9ea6cd4a..e4adfa1b 100644 --- a/include/core/literal.h +++ b/include/core/literal.h @@ -1,7 +1,7 @@ #pragma once #include "con4m.h" -extern __uint128_t c4m_raw_int_parse(char *, +extern __uint128_t c4m_raw_int_parse(c4m_utf8_t *, c4m_compile_error_t *, bool *); extern __uint128_t c4m_raw_hex_parse(char *, @@ -15,3 +15,4 @@ extern c4m_compile_error_t c4m_parse_simple_lit(c4m_token_t *, c4m_utf8_t **); extern c4m_builtin_t c4m_base_type_from_litmod(c4m_lit_syntax_t, c4m_utf8_t *); +extern bool c4m_fix_litmod(c4m_token_t *, c4m_pnode_t *); diff --git a/include/core/object.h b/include/core/object.h index 21208403..d7f5812e 100644 --- a/include/core/object.h +++ b/include/core/object.h @@ -121,3 +121,8 @@ extern const c4m_vtable_t c4m_callback_vtable; extern const c4m_vtable_t c4m_flags_vtable; extern const c4m_vtable_t c4m_box_vtable; extern const c4m_vtable_t c4m_basic_http_vtable; +extern const c4m_vtable_t c4m_datetime_vtable; +extern const c4m_vtable_t c4m_date_vtable; +extern const c4m_vtable_t c4m_time_vtable; +extern const c4m_vtable_t c4m_size_vtable; +extern const c4m_vtable_t c4m_duration_vtable; diff --git a/include/core/type.h b/include/core/type.h index d7a24b33..3c6a9cb2 100644 --- a/include/core/type.h +++ b/include/core/type.h @@ -315,15 +315,15 @@ c4m_type_typespec() } static inline c4m_type_t * -c4m_type_ipv4() +c4m_type_ip() { return c4m_bi_types[C4M_T_IPV4]; } static inline c4m_type_t * -c4m_type_ipv6() +c4m_type_ipv4() { - return c4m_bi_types[C4M_T_IPV6]; + return c4m_bi_types[C4M_T_IPV4]; } static inline c4m_type_t * diff --git a/include/io/http.h b/include/io/http.h index 64d55e06..4b0b24c8 100644 --- a/include/io/http.h +++ b/include/io/http.h @@ -60,6 +60,14 @@ extern c4m_basic_http_response_t *_c4m_http_upload(c4m_str_t *, c4m_buf_t *, ...); +static inline bool +c4m_validate_url(c4m_utf8_t *candidate) +{ + CURLU *handle = curl_url(); + + return curl_url_set(handle, CURLUPART_URL, candidate->data, 0) == CURLUE_OK; +} + static inline bool c4m_http_op_succeded(c4m_basic_http_response_t *op) { diff --git a/include/util/wrappers.h b/include/util/wrappers.h index 305ef009..92298728 100644 --- a/include/util/wrappers.h +++ b/include/util/wrappers.h @@ -3,9 +3,9 @@ #include "con4m.h" extern c4m_utf32_t *c4m_wrapper_join(c4m_list_t *, const c4m_str_t *); -extern c4m_str_t *c4m_wrapper_hostname(); -extern c4m_str_t *c4m_wrapper_os(); -extern c4m_str_t *c4m_wrapper_arch(); +extern c4m_str_t *c4m_wrapper_hostname(void); +extern c4m_str_t *c4m_wrapper_os(void); +extern c4m_str_t *c4m_wrapper_arch(void); extern c4m_str_t *c4m_wrapper_repr(c4m_obj_t); extern c4m_str_t *c4m_wrapper_to_str(c4m_obj_t); extern void c4m_snap_column(c4m_grid_t *, int64_t); diff --git a/meson.build b/meson.build index d0e479d6..a5f9b352 100644 --- a/meson.build +++ b/meson.build @@ -35,6 +35,14 @@ c_args = [ '-DC4M_MIN_RENDER_WIDTH=' + render_width, ] +if get_option('static_ffi_binding').enabled() + c_args = c_args + ['-DC4M_STATIC_FFI_BINDING'] +elif not get_option('static_ffi_binding').disabled() + if not using_glibc and not using_osx + c_args = c_args + ['-DC4M_STATIC_FFI_BINDING'] + endif +endif + if (host_machine.cpu_family() == 'x86_64' and cc.get_id() == 'clang') c_args = c_args + ['-Wno-atomic-alignment'] endif @@ -208,6 +216,9 @@ c4m_adts = [ 'src/adts/mixed.c', 'src/adts/tuple.c', 'src/adts/ipaddr.c', + 'src/adts/datetime.c', + 'src/adts/size.c', + 'src/adts/duration.c', 'src/adts/callback.c', 'src/adts/streams.c', 'src/adts/flags.c', @@ -328,6 +339,7 @@ test_src = [ threads = dependency('threads') math = cc.find_library('m', required: false) + ffi = cc.find_library( 'ffi', required: true diff --git a/meson.options b/meson.options index ab0d314c..5e9ee04f 100644 --- a/meson.options +++ b/meson.options @@ -127,4 +127,11 @@ option( type: 'feature', value: 'auto', description: 'Run test suite without forking', +) + +option( + 'static_ffi_binding', + type: 'feature', + value: 'auto', + description: 'Whether to statically bind native builtins in std library', ) \ No newline at end of file diff --git a/src/adts/datetime.c b/src/adts/datetime.c new file mode 100644 index 00000000..af6cce82 --- /dev/null +++ b/src/adts/datetime.c @@ -0,0 +1,1203 @@ +#include "con4m.h" + +static inline bool +us_written_date(c4m_utf8_t *input, c4m_date_time_t *result) +{ + c4m_utf8_t *month_part = c4m_new_utf8(""); + int ix = 0; + int l = c4m_str_byte_len(input); + int day = 0; + int year = 0; + int daylen; + int yearlen; + int start_ix; + char *s; + + if (!input || c4m_str_byte_len(input) == 0) { + return false; + } + + s = input->data; + + while (ix < l) { + char c = *s; + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + ++ix; + ++s; + } + else { + break; + } + } + + if (ix == 0) { + return false; + } + + month_part = c4m_to_utf8(c4m_str_lower(c4m_str_slice(input, 0, ix))); + + while (ix < l) { + if (*s == ' ') { + ++ix; + ++s; + } + else { + break; + } + } + + start_ix = ix; + + while (ix < l) { + switch (*s) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + day *= 10; + day += *s - '0'; + ++ix; + ++s; + continue; + case ' ': + case ',': + break; + default: + return false; + } + break; + } + + daylen = ix - start_ix; + + if (ix < l && *s == ',') { + s++; + ix++; + } + + while (ix < l) { + if (*s != ' ') { + break; + } + s++; + ix++; + } + + start_ix = ix; + while (ix < l) { + if (*s < '0' || *s > '9') { + break; + } + year *= 10; + year += *s - '0'; + ++ix; + ++s; + } + + if (year < 100) { + year += 2000; + } + + if (ix < l) { + return false; + } + + yearlen = ix - start_ix; + + if (daylen == 4 && yearlen == 0) { + year = day; + day = 0; + } + +#define month_str_is(x) !strcmp(month_part->data, x) + do { + if (month_str_is("jan") || month_str_is("january")) { + result->dt.tm_mon = 0; + break; + } + if (month_str_is("feb") || month_str_is("february")) { + result->dt.tm_mon = 1; + break; + } + if (month_str_is("mar") || month_str_is("march")) { + result->dt.tm_mon = 2; + break; + } + if (month_str_is("apr") || month_str_is("april")) { + result->dt.tm_mon = 3; + break; + } + if (month_str_is("may")) { + result->dt.tm_mon = 4; + break; + } + if (month_str_is("jun") || month_str_is("june")) { + result->dt.tm_mon = 5; + break; + } + if (month_str_is("jul") || month_str_is("july")) { + result->dt.tm_mon = 6; + break; + } + if (month_str_is("aug") || month_str_is("august")) { + result->dt.tm_mon = 7; + break; + } + // clang-format off + if (month_str_is("sep") || month_str_is("sept") || + month_str_is("september")) { + // clang-format on + result->dt.tm_mon = 8; + break; + } + if (month_str_is("oct") || month_str_is("october")) { + result->dt.tm_mon = 9; + break; + } + if (month_str_is("nov") || month_str_is("november")) { + result->dt.tm_mon = 10; + break; + } + if (month_str_is("dec") || month_str_is("december")) { + result->dt.tm_mon = 11; + break; + } + return false; + } while (true); + + result->have_month = 1; + + if (day != 0) { + if (day > 31) { + return false; + } + result->have_day = 1; + result->dt.tm_mday = day; + } + if (year != 0) { + result->have_year = 1; + result->dt.tm_year = year - 1900; + } + + if (day > 31) { + return false; + } + + if (result->dt.tm_mon == 1 && day == 30) { + return false; + } + + if (day == 31) { + switch (result->dt.tm_mon) { + case 1: + case 3: + case 5: + case 8: + case 10: + return false; + default: + break; + } + } + + return true; +} + +static inline bool +other_written_date(c4m_utf8_t *input, c4m_date_time_t *result) +{ + int l = c4m_str_byte_len(input); + char *s = input->data; + char *e = s + l; + c4m_utf8_t *day; + + while (s < e) { + char c = *s; + + if (c < '0' || c > '9') { + break; + } + s++; + } + + if (s == input->data) { + return false; + } + + day = c4m_to_utf8(c4m_str_slice(input, 0, s - input->data)); + + while (s < e) { + if (*s != ' ') { + break; + } + s++; + } + + char *month_part = s; + + while (s < e) { + char c = *s; + + if (c < 'A' || c > 'z' || (c > 'Z' && c < 'a')) { + break; + } + s++; + } + if (s == month_part) { + return false; + } + + int mstart = month_part - input->data; + int mend = s - input->data; + + c4m_utf8_t *mo = c4m_to_utf8(c4m_str_slice(input, mstart, mend)); + c4m_utf8_t *year = c4m_to_utf8(c4m_str_slice(input, mend, l)); + c4m_utf8_t *americanized = c4m_cstr_format("{} {}{}", mo, day, year); + + return us_written_date(americanized, result); +} + +#define WAS_DIGIT(x) \ + if (x < 0 || x > 9) { \ + return false; \ + } + +#define ONE_DIGIT(n) \ + if (s == e) { \ + return false; \ + } \ + n = *s++ - '0'; \ + WAS_DIGIT(n) + +#define PARSE_MONTH() \ + ONE_DIGIT(m); \ + ONE_DIGIT(tmp); \ + m *= 10; \ + m += tmp; \ + \ + if (m > 12) { \ + return false; \ + } \ + \ + result->dt.tm_mon = m - 1; \ + result->have_month = true + +#define PARSE_DAY() \ + ONE_DIGIT(d); \ + if (d > 3) { \ + return false; \ + } \ + d *= 10; \ + ONE_DIGIT(tmp); \ + d += tmp; \ + switch (m) { \ + case 2: \ + if (d > 29) { \ + return false; \ + } \ + break; \ + case 4: \ + case 6: \ + case 9: \ + case 11: \ + if (d > 30) { \ + return false; \ + } \ + break; \ + default: \ + if (d > 31) { \ + return false; \ + } \ + break; \ + } \ + \ + result->dt.tm_mday = d; \ + result->have_day = true + +#define PARSE_YEAR4() \ + ONE_DIGIT(y); \ + y *= 10; \ + ONE_DIGIT(tmp); \ + y += tmp; \ + y *= 10; \ + ONE_DIGIT(tmp); \ + y += tmp; \ + y *= 10; \ + ONE_DIGIT(tmp); \ + y += tmp; \ + \ + result->dt.tm_year = y - 1900; \ + result->have_year = true + +#define REQUIRE_DASH() \ + if (*s++ != '-') { \ + return false; \ + } + +static inline bool +iso_date(c4m_utf8_t *input, c4m_date_time_t *result) +{ + int l = c4m_str_byte_len(input); + char *s = input->data; + char *e = s + l; + int m = 0; + int d = 0; + int y = 0; + bool elided_dashes = false; + int tmp; + + switch (l) { + case 4: + REQUIRE_DASH(); + REQUIRE_DASH(); + PARSE_MONTH(); + return true; + + case 7: + REQUIRE_DASH(); + REQUIRE_DASH(); + PARSE_MONTH(); + REQUIRE_DASH(); + PARSE_DAY(); + return true; + case 8: + elided_dashes = true; + // fallthrough; + case 10: + PARSE_YEAR4(); + + if (!elided_dashes) { + REQUIRE_DASH(); + } + PARSE_MONTH(); + if (!elided_dashes) { + REQUIRE_DASH(); + } + PARSE_DAY(); + return true; + + default: + return false; + } +} + +static bool +to_native_date(c4m_utf8_t *i, c4m_date_time_t *r) +{ + if (iso_date(i, r) || other_written_date(i, r) || us_written_date(i, r)) { + return true; + } + + return false; +} + +#define END_OR_TZ() \ + if (s == e) { \ + return true; \ + } \ + switch (*s) { \ + case 'Z': \ + case 'z': \ + case '+': \ + case '-': \ + break; \ + default: \ + return false; \ + } + +static bool +to_native_time(c4m_utf8_t *input, c4m_date_time_t *result) +{ + int hr = 0; + int min = 0; + int sec = 0; + int l = c4m_str_byte_len(input); + char *s = input->data; + char *e = s + l; + char tmp; + + ONE_DIGIT(hr); + if (*s != ':') { + hr *= 10; + ONE_DIGIT(tmp); + hr += tmp; + } + if (*s++ != ':') { + return false; + } + if (hr > 23) { + return false; + } + result->dt.tm_hour = hr; + + ONE_DIGIT(min); + min *= 10; + ONE_DIGIT(tmp); + min += tmp; + + if (min > 59) { + return false; + } + + result->dt.tm_min = min; + result->have_time = true; + + if (s == e) { + return true; + } + if (*s == ':') { + s++; + ONE_DIGIT(sec); + sec *= 10; + ONE_DIGIT(tmp); + sec += tmp; + if (sec > 60) { + return false; + } + result->have_sec = true; + result->dt.tm_sec = sec; + + if (s == e) { + return true; + } + if (*s == '.') { + result->fracsec = 0; + result->have_frac_sec = true; + + ONE_DIGIT(result->fracsec); + while (*s >= '0' && *s <= '9') { + result->fracsec *= 10; + ONE_DIGIT(tmp); + result->fracsec += tmp; + } + } + } + + while (s < e && *s == ' ') { + s++; + } + + if (s == e) { + return true; + } + + switch (*s) { + case 'a': + if (*++s != 'm') { + return false; + } + ++s; + END_OR_TZ(); + break; + case 'A': + ++s; + if (*s != 'm' && *s != 'M') { + return false; + } + ++s; + END_OR_TZ(); + break; + case 'p': + if (*++s != 'm') { + return false; + } + ++s; + if (result->dt.tm_hour <= 11) { + result->dt.tm_hour += 12; + } + END_OR_TZ(); + break; + case 'P': + ++s; + if (*s != 'm' && *s != 'M') { + return false; + } + ++s; + if (result->dt.tm_hour <= 11) { + result->dt.tm_hour += 12; + } + END_OR_TZ(); + break; + case 'Z': + case 'z': + case '+': + case '-': + break; + default: + return false; + } + + result->have_offset = true; + + if (*s == 'Z' || *s == 'z') { + s++; + } + + if (s == e) { + return true; + } + + int mul = 1; + int offset = 0; + + if (*s == '-') { + mul = -1; + s++; + } + else { + if (*s == '+') { + s++; + } + } + + if (s == e) { + return false; + } + + ONE_DIGIT(offset); + offset *= mul; + + if (s != e) { + ONE_DIGIT(tmp); + offset *= 10; + offset += tmp; + } + + // This range covers it; + // the true *historic range is -15:56:00 - 15:13:42 + // and in practice should generally be -12 to +14 now. + + if (offset < -15 || offset > 15) { + return false; + } + + result->dt.tm_gmtoff = offset * 60 * 60; + + if (s == e) { + return true; + } + + if (*s == ':') { + s++; + } + + offset = 0; + ONE_DIGIT(offset); + ONE_DIGIT(tmp); + offset *= 10; + offset += tmp; + if (offset > 59) { + return false; + } + + result->dt.tm_gmtoff += offset * 60; + + if (s == e) { + return true; + } + + if (*s++ != ':') { + return false; + } + + offset = 0; + ONE_DIGIT(offset); + ONE_DIGIT(tmp); + offset *= 10; + offset += tmp; + if (offset > 60) { + return false; + } + + result->dt.tm_gmtoff += offset; + + return s == e; +} + +static bool +to_native_date_and_or_time(c4m_utf8_t *input, c4m_date_time_t *result) +{ + int ix = c4m_str_find(input, c4m_new_utf8("T")); + bool im_exhausted = false; + + if (ix == -1) { + ix = c4m_str_find(input, c4m_new_utf8("t")); + } + +try_a_slice: + if (ix != -1) { + int l = c4m_str_codepoint_len(input); + c4m_utf8_t *date = c4m_to_utf8(c4m_str_slice(input, 0, ix)); + c4m_utf8_t *time = c4m_to_utf8(c4m_str_slice(input, ix + 1, l)); + + if (iso_date(date, result) && to_native_time(time, result)) { + return true; + } + + if (to_native_date(date, result) && to_native_time(time, result)) { + return true; + } + + if (im_exhausted) { + // We've been up here twice, why loop forever when it isn't + // going to work out? + return false; + } + } + + // Otherwise, first look for the first colon after a space. + ix = c4m_str_find(input, c4m_new_utf8(":")); + + if (ix != -1) { + int last_space = -1; + c4m_utf32_t *as_32 = c4m_to_utf32(input); + c4m_codepoint_t *cptr = (c4m_codepoint_t *)as_32->data; + + for (int i = 0; i < ix; i++) { + if (*cptr++ == ' ') { + last_space = i; + } + } + + if (last_space != -1) { + ix = last_space; + im_exhausted = true; + goto try_a_slice; + } + } + + if (to_native_date(input, result)) { + return true; + } + + memset(result, 0, sizeof(c4m_date_time_t)); + + if (to_native_time(input, result)) { + return true; + } + + return false; +} + +#define YEAR_NOT_SET 0x7fffffff + +static void +datetime_init(c4m_date_time_t *self, va_list args) +{ + c4m_utf8_t *to_parse = NULL; + int32_t hr = -1; + int32_t min = -1; + int32_t sec = -1; + int32_t month = -1; + int32_t day = -1; + int32_t year = YEAR_NOT_SET; + int32_t offset_hr = -100; + int32_t offset_min = 0; + int32_t offset_sec = 0; + int64_t fracsec = -1; + + c4m_karg_va_init(args); + c4m_kw_ptr("to_parse", args); + c4m_kw_int32("hr", hr); + c4m_kw_int32("min", min); + c4m_kw_int32("sec", sec); + c4m_kw_int32("month", month); + c4m_kw_int32("day", day); + c4m_kw_int32("year", year); + c4m_kw_int32("offset_hr", offset_hr); + c4m_kw_int32("offset_min", offset_min); + c4m_kw_int32("offset_sec", offset_sec); + c4m_kw_int64("fracsec", fracsec); + + if (to_parse != NULL) { + to_parse = c4m_to_utf8(to_parse); + if (!to_native_date_and_or_time(to_parse, self)) { + C4M_CRAISE("Invalid date-time literal."); + } + return; + } + + self->dt.tm_isdst = -1; + + if (hr != -1) { + if (hr < 0 || hr > 23) { + C4M_CRAISE("Invalid hour (must be 0 - 23)"); + } + self->dt.tm_hour = hr; + self->have_time = true; + + if (min != -1) { + if (min < 0 || min > 59) { + C4M_CRAISE("Invalid minute (must be 0 - 59)"); + } + self->dt.tm_min = min; + } + + if (sec != -1) { + if (sec < 0 || sec > 61) { + C4M_CRAISE("Invalid second (must be 0 - 60)"); + } + self->dt.tm_sec = sec; + } + + if (fracsec > 0) { + self->fracsec = fracsec; + } + } + + if (year != YEAR_NOT_SET) { + self->have_year = true; + self->dt.tm_year = year; + } + + if (month != -1) { + if (month < 1 || month > 12) { + C4M_CRAISE("Invalid month (must be 1 - 12)"); + } + self->dt.tm_mon = month - 1; + self->have_month = true; + } + + if (day != -1) { + if (day < 1 || day > 31) { + C4M_CRAISE("Invalid day of month"); + } + self->dt.tm_mday = day; + self->have_day = true; + } + + int offset = 0; + + if (offset_hr >= -15 && offset_hr <= 15) { + offset = offset_hr * 60 * 60; + self->have_offset = true; + } + + if (offset_min > 0 && offset_min < 60) { + offset += offset_min * 60; + self->have_offset = true; + } + + if (offset_sec > 0 && offset_sec <= 60) { + offset += offset_min; + } + + self->dt.tm_gmtoff = offset; +} + +#define DT_HAVE_TIME 1 +#define DT_HAVE_SEC 2 +#define DT_HAVE_FRAC 4 +#define DT_HAVE_MO 8 +#define DT_HAVE_Y 16 +#define DT_HAVE_D 32 +#define DT_HAVE_OFF 64 + +static void +datetime_marshal(c4m_date_time_t *self, + c4m_stream_t *s, + c4m_dict_t *memos, + int64_t *mid) +{ + c4m_marshal_i32(self->dt.tm_sec, s); + c4m_marshal_i32(self->dt.tm_min, s); + c4m_marshal_i32(self->dt.tm_hour, s); + c4m_marshal_i32(self->dt.tm_mday, s); + c4m_marshal_i32(self->dt.tm_mon, s); + c4m_marshal_i32(self->dt.tm_year, s); + c4m_marshal_i32(self->dt.tm_wday, s); + c4m_marshal_i32(self->dt.tm_yday, s); + c4m_marshal_i32(self->dt.tm_isdst, s); + c4m_marshal_i64(self->dt.tm_gmtoff, s); + c4m_marshal_cstring(self->dt.tm_zone, s); + c4m_marshal_u64(self->fracsec, s); + + int32_t flags = 0; + + if (self->have_time) { + flags |= DT_HAVE_TIME; + } + + if (self->have_sec) { + flags |= DT_HAVE_SEC; + } + + if (self->have_frac_sec) { + flags |= DT_HAVE_FRAC; + } + + if (self->have_month) { + flags |= DT_HAVE_MO; + } + + if (self->have_year) { + flags |= DT_HAVE_Y; + } + + if (self->have_day) { + flags |= DT_HAVE_D; + } + + if (self->have_offset) { + flags |= DT_HAVE_OFF; + } + + c4m_marshal_i32(flags, s); +} + +static void +datetime_unmarshal(c4m_date_time_t *self, c4m_stream_t *s, c4m_dict_t *memos) +{ + self->dt.tm_sec = c4m_unmarshal_i32(s); + self->dt.tm_min = c4m_unmarshal_i32(s); + self->dt.tm_hour = c4m_unmarshal_i32(s); + self->dt.tm_mday = c4m_unmarshal_i32(s); + self->dt.tm_mon = c4m_unmarshal_i32(s); + self->dt.tm_year = c4m_unmarshal_i32(s); + self->dt.tm_wday = c4m_unmarshal_i32(s); + self->dt.tm_yday = c4m_unmarshal_i32(s); + self->dt.tm_isdst = c4m_unmarshal_i32(s); + self->dt.tm_gmtoff = c4m_unmarshal_i64(s); + self->dt.tm_zone = c4m_unmarshal_cstring(s); + self->fracsec = c4m_unmarshal_u64(s); + int32_t flags = c4m_unmarshal_i32(s); + + if (!flags) { + return; + } + + if (flags & DT_HAVE_TIME) { + self->have_time = true; + } + + if (flags & DT_HAVE_SEC) { + self->have_sec = true; + } + + if (flags & DT_HAVE_FRAC) { + self->have_frac_sec = true; + } + + if (flags & DT_HAVE_MO) { + self->have_month = true; + } + + if (flags & DT_HAVE_Y) { + self->have_year = true; + } + + if (flags & DT_HAVE_D) { + self->have_day = true; + } + + if (flags & DT_HAVE_OFF) { + self->have_offset = true; + } +} + +static c4m_str_t * +datetime_repr(c4m_date_time_t *self) +{ + // TODO: this could use a lot more logic to make it more sane + // when bits aren't fully filled out. + // + // Also, for now we are just omitting the fractional second. + + char *fmt = NULL; + + if (self->have_time) { + if (self->have_day || self->have_month || self->have_year) { + if (self->have_offset) { + fmt = "%Y-%m-%dT%H:%M:%S%z"; + } + else { + fmt = "%Y-%m-%dT%H:%M:%S"; + } + } + else { + fmt = "%H:%M:%S"; + } + } + else { + fmt = "%Y-%m-%d"; + } + + char buf[1024]; + + if (!strftime(buf, 1024, fmt, &self->dt)) { + return c4m_new_utf8("<>"); + } + + return c4m_new_utf8(buf); +} + +static c4m_date_time_t * +datetime_lit(c4m_utf8_t *s, + c4m_lit_syntax_t st, + c4m_utf8_t *mod, + c4m_compile_error_t *err) +{ + c4m_date_time_t *result = c4m_new(c4m_type_datetime()); + + if (!to_native_date_and_or_time(s, result)) { + *err = c4m_err_invalid_dt_spec; + return NULL; + } + + return result; +} + +static bool +datetime_can_coerce_to(c4m_type_t *my_type, c4m_type_t *target_type) +{ + switch (target_type->details->base_type->typeid) { + case C4M_T_DATETIME: + case C4M_T_DATE: + case C4M_T_TIME: + return true; + default: + return false; + } +} + +static c4m_date_time_t * +datetime_coerce_to(c4m_date_time_t *dt) +{ + return dt; +} + +static c4m_date_time_t * +datetime_copy(c4m_date_time_t *dt) +{ + c4m_date_time_t *result = c4m_new(c4m_get_my_type(dt)); + + memcpy(result, dt, sizeof(c4m_date_time_t)); + + return result; +} + +static c4m_utf8_t * +datetime_format(c4m_date_time_t *dt, c4m_fmt_spec_t *spec) +{ + char fmt_str[3] = {'%', 'F', 0}; + char buf[1024]; + + switch (spec->type) { + case 'A': + case 'B': + case 'C': + case 'D': + case 'F': + case 'G': + case 'H': + case 'I': + case 'M': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'g': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'p': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case '+': + fmt_str[1] = (char)spec->type; + if (!strftime(buf, 1024, fmt_str, &dt->dt)) { + C4M_CRAISE("Internal error (when calling strftime)"); + } + + return c4m_new_utf8(buf); + + default: + C4M_CRAISE("Invalid format specifier for Datetime object"); + } +} + +static c4m_utf8_t * +date_format(c4m_date_time_t *dt, c4m_fmt_spec_t *spec) +{ + char fmt_str[3] = {'%', 'F', 0}; + char buf[1024]; + + switch (spec->type) { + case 'A': + case 'B': + case 'C': + case 'D': + case 'F': + case 'G': + case 'U': + case 'V': + case 'W': + case 'Y': + case 'a': + case 'b': + case 'd': + case 'e': + case 'g': + case 'j': + case 'm': + case 'n': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + fmt_str[1] = (char)spec->type; + if (!strftime(buf, 1024, fmt_str, &dt->dt)) { + C4M_CRAISE("Internal error (when calling strftime)"); + } + + return c4m_new_utf8(buf); + + default: + C4M_CRAISE("Invalid format specifier for Date object"); + } +} + +static c4m_utf8_t * +time_format(c4m_date_time_t *dt, c4m_fmt_spec_t *spec) +{ + char fmt_str[3] = {'%', 'F', 0}; + char buf[1024]; + + switch (spec->type) { + case 'H': + case 'I': + case 'M': + case 'R': + case 'S': + case 'T': + case 'X': + case 'Z': + case 'k': + case 'l': + case 'n': + case 'p': + case 'r': + case 's': + case 't': + case 'z': + fmt_str[1] = (char)spec->type; + if (!strftime(buf, 1024, fmt_str, &dt->dt)) { + C4M_CRAISE("Internal error (when calling strftime)"); + } + + return c4m_new_utf8(buf); + + default: + C4M_CRAISE("Invalid format specifier for Time object"); + } +} + +static c4m_date_time_t * +date_lit(c4m_utf8_t *s, + c4m_lit_syntax_t st, + c4m_utf8_t *mod, + c4m_compile_error_t *err) +{ + c4m_date_time_t *result = c4m_new(c4m_type_date()); + + if (!to_native_date(s, result)) { + *err = c4m_err_invalid_date_spec; + return NULL; + } + + return result; +} + +static c4m_date_time_t * +time_lit(c4m_utf8_t *s, + c4m_lit_syntax_t st, + c4m_utf8_t *mod, + c4m_compile_error_t *err) +{ + c4m_date_time_t *result = c4m_new(c4m_type_time()); + + if (!to_native_time(s, result)) { + *err = c4m_err_invalid_time_spec; + return NULL; + } + + return result; +} + +const c4m_vtable_t c4m_datetime_vtable = { + .num_entries = C4M_BI_NUM_FUNCS, + .methods = { + [C4M_BI_CONSTRUCTOR] = (c4m_vtable_entry)datetime_init, + [C4M_BI_REPR] = (c4m_vtable_entry)datetime_repr, + [C4M_BI_FORMAT] = (c4m_vtable_entry)datetime_format, + [C4M_BI_MARSHAL] = (c4m_vtable_entry)datetime_marshal, + [C4M_BI_UNMARSHAL] = (c4m_vtable_entry)datetime_unmarshal, + [C4M_BI_COERCIBLE] = (c4m_vtable_entry)datetime_can_coerce_to, + [C4M_BI_COERCE] = (c4m_vtable_entry)datetime_coerce_to, + [C4M_BI_FROM_LITERAL] = (c4m_vtable_entry)datetime_lit, + [C4M_BI_COPY] = (c4m_vtable_entry)datetime_copy, + [C4M_BI_GC_MAP] = (c4m_vtable_entry)c4m_header_gc_bits, + [C4M_BI_FINALIZER] = NULL, + }, +}; + +const c4m_vtable_t c4m_date_vtable = { + .num_entries = C4M_BI_NUM_FUNCS, + .methods = { + [C4M_BI_CONSTRUCTOR] = (c4m_vtable_entry)datetime_init, + [C4M_BI_REPR] = (c4m_vtable_entry)datetime_repr, + [C4M_BI_FORMAT] = (c4m_vtable_entry)date_format, + [C4M_BI_MARSHAL] = (c4m_vtable_entry)datetime_marshal, + [C4M_BI_UNMARSHAL] = (c4m_vtable_entry)datetime_unmarshal, + [C4M_BI_COERCIBLE] = (c4m_vtable_entry)datetime_can_coerce_to, + [C4M_BI_COERCE] = (c4m_vtable_entry)datetime_coerce_to, + [C4M_BI_FROM_LITERAL] = (c4m_vtable_entry)date_lit, + [C4M_BI_COPY] = (c4m_vtable_entry)datetime_copy, + [C4M_BI_GC_MAP] = (c4m_vtable_entry)c4m_header_gc_bits, + [C4M_BI_FINALIZER] = NULL, + }, +}; + +const c4m_vtable_t c4m_time_vtable = { + .num_entries = C4M_BI_NUM_FUNCS, + .methods = { + [C4M_BI_CONSTRUCTOR] = (c4m_vtable_entry)datetime_init, + [C4M_BI_REPR] = (c4m_vtable_entry)datetime_repr, + [C4M_BI_FORMAT] = (c4m_vtable_entry)time_format, + [C4M_BI_MARSHAL] = (c4m_vtable_entry)datetime_marshal, + [C4M_BI_UNMARSHAL] = (c4m_vtable_entry)datetime_unmarshal, + [C4M_BI_COERCIBLE] = (c4m_vtable_entry)datetime_can_coerce_to, + [C4M_BI_COERCE] = (c4m_vtable_entry)datetime_coerce_to, + [C4M_BI_FROM_LITERAL] = (c4m_vtable_entry)time_lit, + [C4M_BI_COPY] = (c4m_vtable_entry)datetime_copy, + [C4M_BI_GC_MAP] = (c4m_vtable_entry)c4m_header_gc_bits, + [C4M_BI_FINALIZER] = NULL, + }, +}; diff --git a/src/adts/duration.c b/src/adts/duration.c new file mode 100644 index 00000000..5a9c58ba --- /dev/null +++ b/src/adts/duration.c @@ -0,0 +1,603 @@ +#include "con4m.h" + +c4m_duration_t * +c4m_now(void) +{ + c4m_duration_t *result = c4m_new(c4m_type_duration()); + + clock_gettime(CLOCK_REALTIME, result); + + return result; +} + +c4m_duration_t * +c4m_timestamp(void) +{ + c4m_duration_t *result = c4m_new(c4m_type_duration()); + + clock_gettime(CLOCK_MONOTONIC, result); + + return result; +} + +c4m_duration_t * +c4m_process_cpu(void) +{ + c4m_duration_t *result = c4m_new(c4m_type_duration()); + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, result); + + return result; +} + +c4m_duration_t * +c4m_thread_cpu(void) +{ + c4m_duration_t *result = c4m_new(c4m_type_duration()); + + clock_gettime(CLOCK_THREAD_CPUTIME_ID, result); + + return result; +} + +#if defined(__MACH__) +c4m_duration_t * +c4m_uptime(void) +{ + c4m_duration_t *result = c4m_new(c4m_type_duration()); + + clock_gettime(CLOCK_UPTIME_RAW, result); + + return result; +} +#elif defined(__linux__) +// In Posix, the MONOTONIC clock's reference frame is arbitrary. +// In Linux, it's uptime, and this is higher precisssion than using +// sysinfo(), which only has second resolution. +c4m_duration_t * +c4m_uptime(void) +{ + c4m_duration_t *result = c4m_new(c4m_type_duration()); + + clock_gettime(CLOCK_MONOTONIC, result); + + return result; +} +#else +#error "Unsupported system." +#endif + +static c4m_duration_t *monotonic_start; + +void +c4m_init_program_timestamp(void) +{ + c4m_gc_register_root(&monotonic_start, 1); + monotonic_start = c4m_timestamp(); +} + +c4m_duration_t * +c4m_program_clock(void) +{ + c4m_duration_t *now = c4m_timestamp(); + + return c4m_duration_diff(now, monotonic_start); +} + +static c4m_list_t * +duration_atomize(c4m_utf8_t *s) +{ + c4m_utf8_t *one; + c4m_list_t *result = c4m_list(c4m_type_utf8()); + int l = c4m_str_byte_len(s); + char *p = s->data; + char *end = p + l; + int start = 0; + int cur = 0; + + while (p < end) { + while (p < end && isdigit(*p)) { + cur++; + p++; + } + if (start == cur) { + return NULL; + } + one = c4m_str_slice(s, start, cur); + c4m_list_append(result, one); + + while (p < end && (isspace(*p) || *p == ',')) { + cur++; + p++; + } + + start = cur; + + while (p < end && !isdigit(*p) && *p != ' ' && *p != ',') { + cur++; + p++; + } + if (start == cur) { + return NULL; + } + + one = c4m_str_slice(s, start, cur); + c4m_list_append(result, one); + + while (p < end && (isspace(*p) || *p == ',')) { + cur++; + p++; + } + start = cur; + } + + return result; +} + +#define SEC_PER_MIN 60 +#define SEC_PER_HR (SEC_PER_MIN * 60) +#define SEC_PER_DAY (SEC_PER_HR * 24) +#define SEC_PER_WEEK (SEC_PER_DAY * 7) +#define SEC_PER_YEAR (SEC_PER_DAY * 365) +#define NS_PER_MS 1000000 +#define NS_PER_US 1000 +#define C4M_MAX_UINT (~0ULL) + +static inline int64_t +tv_sec_multiple(c4m_utf8_t *s) +{ + // clang-format off + switch (s->data[0]) { + case 's': + if (!strcmp(s->data, "s") || + !strcmp(s->data, "sec") || + !strcmp(s->data, "secs") || + !strcmp(s->data, "seconds")) { + return 1; + } + return 0; + case 'm': + if (!strcmp(s->data, "m") || + !strcmp(s->data, "min") || + !strcmp(s->data, "mins") || + !strcmp(s->data, "minutes")) { + return SEC_PER_MIN; + } + return 0; + case 'h': + if (!strcmp(s->data, "h") || + !strcmp(s->data, "hr") || + !strcmp(s->data, "hrs") || + !strcmp(s->data, "hours")) { + return SEC_PER_HR; + } + return 0; + case 'd': + if (!strcmp(s->data, "d") || + !strcmp(s->data, "day") || + !strcmp(s->data, "days")) { + return SEC_PER_DAY; + } + return 0; + case 'w': + if (!strcmp(s->data, "w") || + !strcmp(s->data, "wk") || + !strcmp(s->data, "wks") || + !strcmp(s->data, "week") || + !strcmp(s->data, "weeks")) { + return SEC_PER_WEEK; + } + return 0; + case 'y': + if (!strcmp(s->data, "y") || + !strcmp(s->data, "yr") || + !strcmp(s->data, "yrs") || + !strcmp(s->data, "year") || + !strcmp(s->data, "years")) { + return SEC_PER_YEAR; + } + return 0; + default: + return 0; + } +} + +static inline int64_t +tv_nano_multiple(c4m_utf8_t *s) +{ + // For sub-seconds, we convert to nanoseconds. + switch (s->data[0]) { + case 'n': + if (!strcmp(s->data, "n") || + !strcmp(s->data, "ns") || + !strcmp(s->data, "nsec") || + !strcmp(s->data, "nsecs") || + !strcmp(s->data, "nanosec") || + !strcmp(s->data, "nanosecs") || + !strcmp(s->data, "nanosecond") || + !strcmp(s->data, "nanoseconds")) { + return 1; + } + return 0; + case 'm': + if (!strcmp(s->data, "m") || + !strcmp(s->data, "ms") || + !strcmp(s->data, "msec") || + !strcmp(s->data, "msecs") || + !strcmp(s->data, "millisec") || + !strcmp(s->data, "millisecs") || + !strcmp(s->data, "millisecond") || + !strcmp(s->data, "milliseconds")) { + return NS_PER_MS; + } + if (!strcmp(s->data, "microsec") || + !strcmp(s->data, "microsecs") || + !strcmp(s->data, "microsecond") || + !strcmp(s->data, "microseconds")) { + return NS_PER_US; + } + return 0; + case 'u': + if (!strcmp(s->data, "u") || + !strcmp(s->data, "us") || + !strcmp(s->data, "usec") || + !strcmp(s->data, "usecs") || + !strcmp(s->data, "usecond") || + strcmp(s->data, "useconds")) { + return NS_PER_US; + } + return 0; + default: + return 0; + } + // clang-format on +} + +static bool +str_to_duration(c4m_utf8_t *s, + struct timespec *ts, + c4m_compile_error_t *err) +{ + c4m_list_t *atoms = duration_atomize(s); + + if (!atoms) { + *err = c4m_err_invalid_duration_lit; + return false; + } + + int i = 0; + int n = c4m_list_len(atoms); + __uint128_t sec = 0; + __uint128_t sub = 0; + __uint128_t tmp; + int64_t multiple; + bool neg; + c4m_utf8_t *tmpstr; + + while (i < n) { + tmp = c4m_raw_int_parse(c4m_list_get(atoms, i++, NULL), err, &neg); + + if (neg) { + *err = c4m_err_invalid_duration_lit; + return false; + } + + if (*err != c4m_err_no_error) { + *err = c4m_err_invalid_duration_lit; + return false; + } + + if (tmp > C4M_MAX_UINT) { + *err = c4m_err_parse_lit_overflow; + return false; + } + + tmpstr = c4m_list_get(atoms, i++, NULL); + multiple = tv_sec_multiple(tmpstr); + + if (multiple) { + sec += (multiple * tmp); + if (sec > C4M_MAX_UINT) { + *err = c4m_err_parse_lit_overflow; + return false; + } + continue; + } + + multiple = tv_nano_multiple(tmpstr); + if (!multiple) { + *err = c4m_err_invalid_duration_lit; + return false; + } + + sub += (multiple * tmp); + if (sub > C4M_MAX_UINT) { + *err = c4m_err_parse_lit_overflow; + return false; + } + } + + ts->tv_sec = sec; + ts->tv_nsec = sub; + + return true; +} + +static void +duration_init(struct timespec *ts, va_list args) +{ + c4m_utf8_t *to_parse = NULL; + int64_t sec = -1; + int64_t nanosec = -1; + c4m_compile_error_t err; + + c4m_karg_va_init(args); + c4m_kw_ptr("to_parse", args); + c4m_kw_uint64("sec", args); + c4m_kw_uint64("nanosec", args); + + if (to_parse) { + if (!str_to_duration(to_parse, ts, &err)) { + C4M_RAISE(c4m_err_code_to_str(err)); + } + return; + } + + if (sec < 0) { + sec = 0; + } + if (nanosec < 0) { + nanosec = 0; + } + + ts->tv_sec = sec; + ts->tv_nsec = nanosec; + return; +} + +static c4m_utf8_t * +repr_sec(int64_t n) +{ + c4m_list_t *l = c4m_list(c4m_type_utf8()); + c4m_utf8_t *s; + int64_t tmp; + + if (n >= SEC_PER_YEAR) { + tmp = n / SEC_PER_YEAR; + if (tmp > 1) { + s = c4m_cstr_format("{} years", c4m_box_u64(tmp)); + } + else { + s = c4m_new_utf8("1 year"); + } + + c4m_list_append(l, s); + n -= (tmp * SEC_PER_YEAR); + } + + if (n >= SEC_PER_WEEK) { + tmp = n / SEC_PER_WEEK; + if (tmp > 1) { + s = c4m_cstr_format("{} weeks", c4m_box_u64(tmp)); + } + else { + s = c4m_new_utf8("1 week"); + } + + c4m_list_append(l, s); + n -= (tmp * SEC_PER_WEEK); + } + + if (n >= SEC_PER_DAY) { + tmp = n / SEC_PER_DAY; + if (tmp > 1) { + s = c4m_cstr_format("{} days", c4m_box_u64(tmp)); + } + else { + s = c4m_new_utf8("1 day"); + } + + c4m_list_append(l, s); + n -= (tmp * SEC_PER_DAY); + } + + if (n >= SEC_PER_HR) { + tmp = n / SEC_PER_HR; + if (tmp > 1) { + s = c4m_cstr_format("{} hours", c4m_box_u64(tmp)); + } + else { + s = c4m_new_utf8("1 hour"); + } + + c4m_list_append(l, s); + n -= (tmp * SEC_PER_HR); + } + + if (n >= SEC_PER_MIN) { + tmp = n / SEC_PER_MIN; + if (tmp > 1) { + s = c4m_cstr_format("{} minutes", c4m_box_u64(tmp)); + } + else { + s = c4m_new_utf8("1 minute"); + } + + c4m_list_append(l, s); + n -= (tmp * SEC_PER_MIN); + } + + if (n) { + if (n == 1) { + s = c4m_new_utf8("1 second"); + } + else { + s = c4m_cstr_format("{} seconds", c4m_box_u64(n)); + } + c4m_list_append(l, s); + } + + return c4m_to_utf8(c4m_str_join(l, c4m_new_utf8(", "))); +} + +static c4m_utf8_t * +repr_ns(int64_t n) +{ + int ms = n / 1000; + int ns = n - (ms * 1000); + + if (ms && ns) { + return c4m_cstr_format("{} msec, {} nsec", + c4m_box_u64(ms), + c4m_box_u64(ns)); + } + if (ms) { + return c4m_cstr_format("{} msec", c4m_box_u64(ms)); + } + + return c4m_cstr_format("{} nsec", c4m_box_u64(ns)); +} + +static c4m_utf8_t * +duration_repr(c4m_duration_t *ts) +{ + // TODO: Do better. + + if (!ts->tv_sec && !ts->tv_nsec) { + return c4m_new_utf8("0 seconds"); + } + + if (ts->tv_sec && ts->tv_nsec) { + return c4m_cstr_format("{} {}", + repr_sec(ts->tv_sec), + repr_ns(ts->tv_nsec)); + } + + if (ts->tv_sec) { + return repr_sec(ts->tv_sec); + } + return repr_ns(ts->tv_nsec); +} + +static void +duration_marshal(c4m_duration_t *ts, + c4m_stream_t *s, + c4m_dict_t *m, + int64_t d) +{ + c4m_marshal_u64(ts->tv_sec, s); + c4m_marshal_u64(ts->tv_nsec, s); +} + +static void +duration_unmarshal(c4m_duration_t *ts, c4m_stream_t *s, c4m_dict_t *m) +{ + ts->tv_sec = c4m_unmarshal_u64(s); + ts->tv_nsec = c4m_unmarshal_u64(s); +} + +static bool +duration_eq(c4m_duration_t *t1, c4m_duration_t *t2) +{ + return (t1->tv_sec == t2->tv_sec && t1->tv_nsec == t2->tv_nsec); +} + +static bool +duration_gt(c4m_duration_t *t1, c4m_duration_t *t2) +{ + if (t1->tv_sec > t2->tv_sec) { + return true; + } + if (t1->tv_sec < t2->tv_sec) { + return false; + } + + return t1->tv_nsec > t2->tv_nsec; +} + +static bool +duration_lt(c4m_duration_t *t1, c4m_duration_t *t2) +{ + if (t1->tv_sec < t2->tv_sec) { + return true; + } + if (t1->tv_sec > t2->tv_sec) { + return false; + } + + return t1->tv_nsec < t2->tv_nsec; +} + +c4m_duration_t * +c4m_duration_diff(c4m_duration_t *t1, c4m_duration_t *t2) +{ + c4m_duration_t *result = c4m_new(c4m_type_duration()); + c4m_duration_t *b, *l; + + if (duration_gt(t1, t2)) { + b = t1; + l = t2; + } + else { + b = t2; + l = t1; + } + result->tv_nsec = b->tv_nsec - l->tv_nsec; + result->tv_sec = b->tv_sec - l->tv_sec; + + if (result->tv_nsec < 0) { + result->tv_nsec += 1000000000; + result->tv_sec -= 1; + } + + return result; +} + +static bool +duration_add(c4m_duration_t *t1, c4m_duration_t *t2) +{ + c4m_duration_t *result = c4m_new(c4m_type_duration()); + + result->tv_nsec = t1->tv_nsec + t2->tv_nsec; + result->tv_sec = t1->tv_sec + t2->tv_sec; + + if (result->tv_nsec >= 1000000000) { + result->tv_sec += 1; + result->tv_nsec -= 1000000000; + } + + return result; +} + +static c4m_duration_t * +duration_lit(c4m_utf8_t *s, + c4m_lit_syntax_t st, + c4m_utf8_t *mod, + c4m_compile_error_t *err) +{ + c4m_duration_t *result = c4m_new(c4m_type_duration()); + + if (str_to_duration(s, result, err)) { + return result; + } + + return NULL; +} + +const c4m_vtable_t c4m_duration_vtable = { + .num_entries = C4M_BI_NUM_FUNCS, + .methods = { + [C4M_BI_CONSTRUCTOR] = (c4m_vtable_entry)duration_init, + [C4M_BI_REPR] = (c4m_vtable_entry)duration_repr, + [C4M_BI_MARSHAL] = (c4m_vtable_entry)duration_marshal, + [C4M_BI_UNMARSHAL] = (c4m_vtable_entry)duration_unmarshal, + [C4M_BI_FROM_LITERAL] = (c4m_vtable_entry)duration_lit, + [C4M_BI_EQ] = (c4m_vtable_entry)duration_eq, + [C4M_BI_LT] = (c4m_vtable_entry)duration_lt, + [C4M_BI_GT] = (c4m_vtable_entry)duration_gt, + [C4M_BI_GC_MAP] = (c4m_vtable_entry)c4m_header_gc_bits, + [C4M_BI_ADD] = (c4m_vtable_entry)duration_add, + [C4M_BI_SUB] = (c4m_vtable_entry)c4m_duration_diff, + [C4M_BI_FINALIZER] = NULL, + }, +}; diff --git a/src/adts/ipaddr.c b/src/adts/ipaddr.c index 502db172..576fb890 100644 --- a/src/adts/ipaddr.c +++ b/src/adts/ipaddr.c @@ -1,14 +1,7 @@ #include "con4m.h" -// I realize some of this is redundant, but it's just easier. -typedef struct { - char addr[sizeof(struct sockaddr_in6)]; - uint16_t port; - int32_t af; -} ipaddr_t; - void -ipaddr_set_address(ipaddr_t *obj, c4m_str_t *s, uint16_t port) +c4m_ipaddr_set_address(c4m_ipaddr_t *obj, c4m_str_t *s, uint16_t port) { s = c4m_to_utf8(s); @@ -27,7 +20,7 @@ ipaddr_set_address(ipaddr_t *obj, c4m_str_t *s, uint16_t port) } static void -ipaddr_init(ipaddr_t *obj, va_list args) +ipaddr_init(c4m_ipaddr_t *obj, va_list args) { c4m_str_t *address = NULL; int32_t port = -1; @@ -49,14 +42,17 @@ ipaddr_init(ipaddr_t *obj, va_list args) if (port < 0 || port > 0xffff) { C4M_CRAISE("Invalid port for IP address."); } - ipaddr_set_address(obj, address, (uint16_t)port); + c4m_ipaddr_set_address(obj, address, (uint16_t)port); } } // TODO: currently this isn't at all portable across platforms. // Too quick and dirty. static void -ipaddr_marshal(ipaddr_t *obj, c4m_stream_t *s, c4m_dict_t *memos, int64_t *mid) +ipaddr_marshal(c4m_ipaddr_t *obj, + c4m_stream_t *s, + c4m_dict_t *memos, + int64_t *mid) { c4m_marshal_u32(sizeof(struct sockaddr_in6), s); c4m_stream_raw_write(s, sizeof(struct sockaddr_in6), obj->addr); @@ -65,7 +61,7 @@ ipaddr_marshal(ipaddr_t *obj, c4m_stream_t *s, c4m_dict_t *memos, int64_t *mid) } static void -ipaddr_unmarshal(ipaddr_t *obj, c4m_stream_t *s, c4m_dict_t *memos) +ipaddr_unmarshal(c4m_ipaddr_t *obj, c4m_stream_t *s, c4m_dict_t *memos) { uint32_t struct_sz = c4m_unmarshal_u32(s); @@ -79,7 +75,7 @@ ipaddr_unmarshal(ipaddr_t *obj, c4m_stream_t *s, c4m_dict_t *memos) } static c4m_str_t * -ipaddr_repr(ipaddr_t *obj) +ipaddr_repr(c4m_ipaddr_t *obj) { char buf[INET6_ADDRSTRLEN + 1] = { 0, @@ -98,16 +94,38 @@ ipaddr_repr(ipaddr_t *obj) c4m_str_from_int((int64_t)obj->port))); } +static c4m_ipaddr_t * +ipaddr_lit(c4m_utf8_t *s_u8, + c4m_lit_syntax_t st, + c4m_utf8_t *litmod, + c4m_compile_error_t *err) +{ + c4m_ipaddr_t *result = c4m_new(c4m_type_ip()); + + if (inet_pton(AF_INET, s_u8->data, result) == 1) { + return result; + } + + if (inet_pton(AF_INET6, s_u8->data, result) == 1) { + return result; + } + + *err = c4m_err_invalid_ip; + + return NULL; +} + const c4m_vtable_t c4m_ipaddr_vtable = { .num_entries = C4M_BI_NUM_FUNCS, .methods = { - [C4M_BI_CONSTRUCTOR] = (c4m_vtable_entry)ipaddr_init, - [C4M_BI_TO_STR] = (c4m_vtable_entry)ipaddr_repr, - [C4M_BI_MARSHAL] = (c4m_vtable_entry)ipaddr_marshal, - [C4M_BI_UNMARSHAL] = (c4m_vtable_entry)ipaddr_unmarshal, - [C4M_BI_GC_MAP] = (c4m_vtable_entry)C4M_GC_SCAN_NONE, + [C4M_BI_CONSTRUCTOR] = (c4m_vtable_entry)ipaddr_init, + [C4M_BI_TO_STR] = (c4m_vtable_entry)ipaddr_repr, + [C4M_BI_MARSHAL] = (c4m_vtable_entry)ipaddr_marshal, + [C4M_BI_UNMARSHAL] = (c4m_vtable_entry)ipaddr_unmarshal, + [C4M_BI_GC_MAP] = (c4m_vtable_entry)C4M_GC_SCAN_NONE, + [C4M_BI_FROM_LITERAL] = (c4m_vtable_entry)ipaddr_lit, // Explicit because some compilers don't seem to always properly // zero it (Was sometimes crashing on a `c4m_stream_t` on my mac). - [C4M_BI_FINALIZER] = NULL, + [C4M_BI_FINALIZER] = NULL, }, }; diff --git a/src/adts/list.c b/src/adts/list.c index 3fa109c4..5f7c37f7 100644 --- a/src/adts/list.c +++ b/src/adts/list.c @@ -23,7 +23,7 @@ c4m_list_init(c4m_list_t *list, va_list args) } } -static inline void +void c4m_list_resize(c4m_list_t *list, size_t len) { if (!list->dont_acquire) { diff --git a/src/adts/numbers.c b/src/adts/numbers.c index b25458b2..eb40054f 100644 --- a/src/adts/numbers.c +++ b/src/adts/numbers.c @@ -72,7 +72,7 @@ unsigned_repr(int64_t item) } __uint128_t -raw_int_parse(c4m_utf8_t *u8, c4m_compile_error_t *err, bool *neg) +c4m_raw_int_parse(c4m_utf8_t *u8, c4m_compile_error_t *err, bool *neg) { __uint128_t cur = 0; __uint128_t last = 0; @@ -180,7 +180,7 @@ raw_hex_parse(c4m_utf8_t *u8, c4m_compile_error_t *err) \ switch (st) { \ case ST_Base10: \ - val = raw_int_parse(s, code, &neg); \ + val = c4m_raw_int_parse(s, code, &neg); \ break; \ case ST_1Quote: \ C4M_CRAISE("Single quoted not reimplemented yet.\n"); \ @@ -383,6 +383,19 @@ any_number_can_coerce_to(c4m_type_t *my_type, c4m_type_t *target_type) case C4M_T_F32: case C4M_T_F64: return true; + case C4M_T_SIZE: + switch (c4m_type_get_data_type_info(my_type)->typeid) { + case C4M_T_I8: + case C4M_T_BYTE: + case C4M_T_I32: + case C4M_T_CHAR: + case C4M_T_U32: + case C4M_T_INT: + case C4M_T_UINT: + return true; + default: + return false; + } default: return false; } @@ -403,6 +416,8 @@ any_int_coerce_to(const int64_t data, c4m_type_t *target_type) case C4M_T_INT: case C4M_T_UINT: return (void *)data; + case C4M_T_SIZE: + return c4m_new(c4m_type_size(), c4m_kw("bytes", c4m_ka(data))); case C4M_T_F32: case C4M_T_F64: d = (double)(data); diff --git a/src/adts/size.c b/src/adts/size.c new file mode 100644 index 00000000..00717af7 --- /dev/null +++ b/src/adts/size.c @@ -0,0 +1,467 @@ +#include "con4m.h" + +#define C4M_SZ_KB 1000ULL +#define C4M_SZ_KI (1ULL << 10) +#define C4M_SZ_MB 1000000ULL +#define C4M_SZ_MI (1ULL << 20) +#define C4M_SZ_GB 1000000000ULL +#define C4M_SZ_GI (1ULL << 30) +#define C4M_SZ_TB 1000000000000ULL +#define C4M_SZ_TI (1ULL << 40) +#define C4M_MAX_UINT (~0ULL) + +static bool +parse_size_lit(c4m_utf8_t *to_parse, c4m_size_t *result, bool *oflow) +{ + __int128_t n_bytes = 0; + __int128_t cur; + + if (oflow) { + *oflow = false; + } + + to_parse = c4m_to_utf8(c4m_str_lower(to_parse)); + + int l = c4m_str_byte_len(to_parse); + char *p = to_parse->data; + char *e = p + l; + char c; + uint64_t multiplier; + + if (!l) { + return false; + } + + while (p < e) { + c = *p; + + if (!isdigit(c)) { + return false; + } + + cur = 0; + + while (isdigit(c)) { + cur *= 10; + cur += (c - '0'); + + if (cur > (__int128_t)C4M_MAX_UINT) { + if (oflow) { + *oflow = true; + } + return false; + } + if (p == e) { + return false; + } + c = *++p; + } + + while (c == ' ' || c == ',') { + if (p == e) { + return false; + } + c = *++p; + } + + switch (c) { + case 'b': + p++; + if (p != e) { + c = *p; + if (c == 'y') { + p++; + if (p != e) { + c = *p; + if (c == 't') { + p++; + if (p != e) { + c = *p; + if (c == 'e') { + p++; + if (p != e) { + c = *p; + if (c == 's') { + p++; + } + } + } + } + } + } + } + } + + multiplier = 1; + break; + case 'k': + p++; + if (p != e) { + c = *p; + if (c == 'i') { + p++; + multiplier = C4M_SZ_KI; + } + else { + multiplier = C4M_SZ_KB; + } + + if (p != e) { + c = *p; + if (c == 'b') { + p++; + } + } + } + break; + case 'm': + p++; + if (p != e) { + c = *p; + if (c == 'i') { + p++; + multiplier = C4M_SZ_MI; + } + else { + multiplier = C4M_SZ_MB; + } + + if (p != e) { + c = *p; + if (c == 'b') { + p++; + } + } + } + break; + case 'g': + p++; + if (p != e) { + c = *p; + if (c == 'i') { + p++; + multiplier = C4M_SZ_GI; + } + else { + multiplier = C4M_SZ_GB; + } + + if (p != e) { + c = *p; + if (c == 'b') { + p++; + } + } + } + break; + case 't': + p++; + if (p != e) { + c = *p; + if (c == 'i') { + p++; + multiplier = C4M_SZ_TI; + } + else { + multiplier = C4M_SZ_TB; + } + + if (p != e) { + c = *p; + if (c == 'b') { + p++; + } + } + } + break; + default: + return false; + } + n_bytes += multiplier * cur; + + if (n_bytes > (__int128_t)C4M_MAX_UINT) { + if (oflow) { + *oflow = true; + } + return false; + } + while (p < e) { + c = *p; + if (c != ' ' && c != ',') { + break; + } + p++; + } + } + + uint64_t cast = (uint64_t)n_bytes; + *result = cast; + + return true; +} + +static void +size_init(c4m_size_t *self, va_list args) +{ + c4m_utf8_t *to_parse = NULL; + bool oflow = false; + uint64_t bytes = 0; + + c4m_karg_va_init(args); + c4m_kw_ptr("to_parse", to_parse); + c4m_kw_uint64("bytes", bytes); + + if (to_parse != NULL) { + to_parse = c4m_to_utf8(to_parse); + + if (!parse_size_lit(to_parse, self, &oflow)) { + if (oflow) { + C4M_CRAISE("Size literal value is too large."); + } + else { + C4M_CRAISE("Invalid size literal."); + } + } + } + *self = bytes; +} + +static void +size_marshal(c4m_size_t *self, c4m_stream_t *s, c4m_dict_t *m, int64_t mid) +{ + c4m_marshal_u64(*self, s); +} + +static void +size_unmarshal(c4m_size_t *self, c4m_stream_t *s, c4m_dict_t *memos) +{ + *self = c4m_unmarshal_u64(s); +} + +static c4m_str_t * +size_repr(c4m_size_t *self) +{ + // We produce both power of 2 and power of 10, and then return + // the shorter of the 2. + + uint64_t n = *self; + c4m_utf8_t *p10 = c4m_new_utf8(""); + c4m_utf8_t *p2; + uint64_t tmp; + + if (!n) { + return c4m_new_utf8("0 Bytes"); + } + + if (n >= C4M_SZ_TB) { + tmp = n / C4M_SZ_TB; + p10 = c4m_cstr_format("{} Tb ", c4m_box_u64(tmp)); + tmp *= C4M_SZ_TB; + n -= tmp; + } + if (n >= C4M_SZ_GB) { + tmp = n / C4M_SZ_GB; + p10 = c4m_cstr_format("{}{} Gb ", p10, c4m_box_u64(tmp)); + tmp *= C4M_SZ_GB; + n -= tmp; + } + if (n >= C4M_SZ_MB) { + tmp = n / C4M_SZ_MB; + p10 = c4m_cstr_format("{}{} Mb ", p10, c4m_box_u64(tmp)); + tmp *= C4M_SZ_MB; + n -= tmp; + } + if (n >= C4M_SZ_KB) { + tmp = n / C4M_SZ_KB; + p10 = c4m_cstr_format("{}{} Kb ", p10, c4m_box_u64(tmp)); + tmp *= C4M_SZ_KB; + n -= tmp; + } + + if (n != 0) { + p10 = c4m_cstr_format("{}{} Bytes", p10, c4m_box_u64(n)); + } + else { + p10 = c4m_to_utf8(c4m_str_strip(p10)); + } + + n = *self; + + if (n < 1024) { + return p10; + } + + p2 = c4m_new_utf8(""); + + if (n >= C4M_SZ_TI) { + tmp = n / C4M_SZ_TI; + p2 = c4m_cstr_format("{} TiB ", c4m_box_u64(tmp)); + tmp *= C4M_SZ_TI; + n -= tmp; + } + if (n >= C4M_SZ_GI) { + tmp = n / C4M_SZ_GI; + p2 = c4m_cstr_format("{}{} GiB ", p2, c4m_box_u64(tmp)); + tmp *= C4M_SZ_GI; + n -= tmp; + } + if (n >= C4M_SZ_MI) { + tmp = n / C4M_SZ_MI; + p2 = c4m_cstr_format("{}{} MiB ", p2, c4m_box_u64(tmp)); + tmp *= C4M_SZ_MI; + n -= tmp; + } + if (n >= C4M_SZ_KI) { + tmp = n / C4M_SZ_KI; + p2 = c4m_cstr_format("{}{} KiB ", p2, c4m_box_u64(tmp)); + tmp *= C4M_SZ_KI; + n -= tmp; + } + + if (n != 0) { + p2 = c4m_cstr_format("{}{} Bytes", p2, c4m_box_u64(n)); + } + else { + p2 = c4m_to_utf8(c4m_str_strip(p2)); + } + + if (c4m_str_codepoint_len(p10) < c4m_str_codepoint_len(p2)) { + return p10; + } + + return p2; +} + +static c4m_size_t * +size_lit(c4m_utf8_t *s, + c4m_lit_syntax_t st, + c4m_utf8_t *mod, + c4m_compile_error_t *err) +{ + c4m_size_t *result = c4m_new(c4m_type_size()); + bool overflow = false; + + if (st == ST_Base10) { + __uint128_t v; + bool neg; + v = c4m_raw_int_parse(s, err, &neg); + + if (neg) { + *err = c4m_err_invalid_size_lit; + return NULL; + } + + if (*err != c4m_err_no_error) { + return NULL; + } + + if (v > (__uint128_t)C4M_MAX_UINT) { + *err = c4m_err_parse_lit_overflow; + return NULL; + } + *result = (uint64_t)v; + + return result; + } + + if (!parse_size_lit(s, result, &overflow)) { + if (overflow) { + *err = c4m_err_parse_lit_overflow; + return NULL; + } + else { + *err = c4m_err_invalid_size_lit; + return NULL; + } + } + + return result; +} + +static bool +size_eq(c4m_size_t *r1, c4m_size_t *r2) +{ + return *r1 == *r2; +} + +static bool +size_lt(c4m_size_t *r1, c4m_size_t *r2) +{ + return *r1 < *r2; +} + +static bool +size_gt(c4m_size_t *r1, c4m_size_t *r2) +{ + return *r1 > *r2; +} + +static c4m_size_t * +size_add(c4m_size_t *s1, c4m_size_t *s2) +{ + c4m_size_t *result = c4m_new(c4m_type_size()); + + *result = *s1 + *s2; + + return result; +} + +static c4m_size_t * +size_diff(c4m_size_t *s1, c4m_size_t *s2) +{ + c4m_size_t *result = c4m_new(c4m_type_size()); + + if (*s1 > *s2) { + *result = *s1 - *s2; + } + else { + *result = *s2 - *s1; + } + + return result; +} + +static bool +size_can_coerce_to(c4m_type_t *me, c4m_type_t *them) +{ + switch (c4m_type_get_data_type_info(them)->typeid) { + case C4M_T_INT: + case C4M_T_UINT: + case C4M_T_SIZE: + return true; + default: + return false; + } +} + +static void * +size_coerce_to(c4m_size_t *self, c4m_type_t *target_type) +{ + switch (c4m_type_get_data_type_info(target_type)->typeid) { + case C4M_T_INT: + case C4M_T_UINT: + return (void *)*self; + default: + return self; + } +} + +const c4m_vtable_t c4m_size_vtable = { + .num_entries = C4M_BI_NUM_FUNCS, + .methods = { + [C4M_BI_CONSTRUCTOR] = (c4m_vtable_entry)size_init, + [C4M_BI_REPR] = (c4m_vtable_entry)size_repr, + [C4M_BI_MARSHAL] = (c4m_vtable_entry)size_marshal, + [C4M_BI_UNMARSHAL] = (c4m_vtable_entry)size_unmarshal, + [C4M_BI_FROM_LITERAL] = (c4m_vtable_entry)size_lit, + [C4M_BI_EQ] = (c4m_vtable_entry)size_eq, + [C4M_BI_LT] = (c4m_vtable_entry)size_lt, + [C4M_BI_GT] = (c4m_vtable_entry)size_gt, + [C4M_BI_GC_MAP] = (c4m_vtable_entry)c4m_header_gc_bits, + [C4M_BI_ADD] = (c4m_vtable_entry)size_add, + [C4M_BI_SUB] = (c4m_vtable_entry)size_diff, + [C4M_BI_COERCIBLE] = (c4m_vtable_entry)size_can_coerce_to, + [C4M_BI_COERCE] = (c4m_vtable_entry)size_coerce_to, + [C4M_BI_FINALIZER] = NULL, + }, +}; diff --git a/src/adts/string.c b/src/adts/string.c index 33bde20a..404cde2b 100644 --- a/src/adts/string.c +++ b/src/adts/string.c @@ -1272,7 +1272,7 @@ c4m_str_fsplit(c4m_str_t *str, c4m_str_t *sub) } c4m_list_t * -c4m_str_xsplit(c4m_str_t *str, c4m_str_t *sub) +c4m_str_split(c4m_str_t *str, c4m_str_t *sub) { str = c4m_to_utf32(str); sub = c4m_to_utf32(sub); @@ -1459,6 +1459,14 @@ c4m_str_lit(c4m_utf8_t *s_u8, return c4m_rich_lit(s); } + if (!strcmp(litmod, "url")) { + if (!c4m_validate_url(s_u8)) { + *err = c4m_err_malformed_url; + return NULL; + } + return s_u8; + } + if (c4m_str_codepoint_len(lit_u8) != 0) { *err = c4m_err_parse_no_lit_mod_match; return NULL; diff --git a/src/compiler/check_pass.c b/src/compiler/check_pass.c index bb1611a2..683cea44 100644 --- a/src/compiler/check_pass.c +++ b/src/compiler/check_pass.c @@ -28,6 +28,7 @@ typedef struct { bool augmented_assignment; __uint128_t du_stack; int du_stack_ix; + c4m_list_t *simple_lits_wo_mod; } pass2_ctx; static void base_check_pass_dispatch(pass2_ctx *); @@ -646,7 +647,7 @@ sym_lookup(pass2_ctx *ctx, c4m_utf8_t *name) // even if it's not in the symbol table. if (spec != NULL) { - c4m_list_t *parts = c4m_str_xsplit(name, dot); + c4m_list_t *parts = c4m_str_split(name, dot); c4m_attr_info_t *attr_info = c4m_get_attr_info(spec, parts); switch (attr_info->kind) { @@ -2078,21 +2079,61 @@ handle_identifier(pass2_ctx *ctx) set_node_type(ctx, ctx->node, sym->type); } +static inline bool +should_defer(pass2_ctx *ctx, c4m_utf8_t *litmod) +{ + if (litmod && c4m_str_codepoint_len(litmod)) { + return false; + } + + c4m_tree_node_t *t = ctx->node->parent; + c4m_pnode_t *p = c4m_get_pnode(t); + + if (p->kind != c4m_nt_expression) { + return false; + } + + t = t->parent; + + if (!t) { + return false; + } + + p = c4m_get_pnode(t); + + // Only do it for simple assignment. + return p->kind == c4m_nt_assign; +} + static void check_literal(pass2_ctx *ctx) { // Right now, we don't try to fold sub-items. c4m_pnode_t *pnode = c4m_get_pnode(ctx->node); - c4m_str_t *litmod = pnode->extra_info; + c4m_str_t *litmod = pnode->token->literal_modifier; if (litmod != NULL && litmod->data) { - litmod = c4m_to_utf8(litmod); + litmod = c4m_to_utf8(litmod); + pnode->extra_info = litmod; } switch (pnode->kind) { case c4m_nt_simple_lit: pnode->value = c4m_node_simp_literal(ctx->node); - pnode->type = c4m_get_my_type(pnode->value); + + // If there's no litmod, we want to defer adding the type until + // if the type is concrete by the end of the pass, and the + // type of the parsed object is not aligned, then we will + // re-parse the type. + + if (should_defer(ctx, litmod)) { + c4m_list_append(ctx->simple_lits_wo_mod, ctx->node); + pnode->type = c4m_new_typevar(); + } + else { + pnode->type = c4m_get_my_type(pnode->value); + } + break; case c4m_nt_lit_callback: pnode->value = c4m_node_to_callback(ctx->module_ctx, ctx->node); @@ -2980,6 +3021,31 @@ perform_index_rechecks(pass2_ctx *ctx) } } +static void +process_deferred_lits(pass2_ctx *ctx) +{ + int n = c4m_list_len(ctx->simple_lits_wo_mod); + + for (int i = 0; i < n; i++) { + c4m_tree_node_t *t = c4m_list_get(ctx->simple_lits_wo_mod, i, NULL); + c4m_pnode_t *p = c4m_get_pnode(t); + c4m_obj_t lit = p->value; + c4m_token_t *tok = p->token; + c4m_type_t *type = merge_ignore_err(p->type, + c4m_get_my_type(lit)); + + if (!c4m_type_is_error(type)) { + continue; + } + if (c4m_type_is_concrete(p->type) && c4m_fix_litmod(tok, p)) { + continue; + } + + // This already failed; generate the error though. + merge_or_err(ctx, p->type, c4m_get_my_type(lit)); + } +} + static c4m_list_t * module_check_pass(c4m_compile_ctx *cctx, c4m_module_compile_ctx *module_ctx) { @@ -2989,27 +3055,28 @@ module_check_pass(c4m_compile_ctx *cctx, c4m_module_compile_ctx *module_ctx) } pass2_ctx ctx = { - .attr_scope = cctx->final_attrs, - .global_scope = cctx->final_globals, - .spec = cctx->final_spec, - .compile = cctx, - .module_ctx = module_ctx, - .du_stack = 0, - .du_stack_ix = 0, - .loop_stack = c4m_list(c4m_type_ref()), - .deferred_calls = c4m_list(c4m_type_ref()), - .index_rechecks = c4m_list(c4m_type_ref()), - + .attr_scope = cctx->final_attrs, + .global_scope = cctx->final_globals, + .spec = cctx->final_spec, + .compile = cctx, + .module_ctx = module_ctx, + .du_stack = 0, + .du_stack_ix = 0, + .loop_stack = c4m_list(c4m_type_ref()), + .deferred_calls = c4m_list(c4m_type_ref()), + .index_rechecks = c4m_list(c4m_type_tree(c4m_type_parse_node())), + .simple_lits_wo_mod = c4m_list(c4m_type_tree(c4m_type_parse_node())), }; #ifdef C4M_DEV - module_ctx->print_nodes = c4m_list(c4m_type_ref()); + module_ctx->print_nodes = c4m_list(c4m_type_tree(c4m_type_parse_node())); #endif check_module_toplevel(&ctx); process_function_definitions(&ctx); perform_index_rechecks(&ctx); validate_module_variables(module_ctx); + process_deferred_lits(&ctx); return ctx.deferred_calls; } diff --git a/src/compiler/errors.c b/src/compiler/errors.c index 9fda991a..94720d86 100644 --- a/src/compiler/errors.c +++ b/src/compiler/errors.c @@ -1190,12 +1190,49 @@ static error_info_t error_info[] = { "integer types.", false, }, + [c4m_err_invalid_ip] = { + c4m_err_invalid_ip, + "invalid_ip", + "Literal is not a valid IP address.", + false, + }, [c4m_err_last] = { c4m_err_last, "last", "If you see this error, the compiler writer messed up bad", false, }, + [c4m_err_invalid_dt_spec] = { + c4m_err_invalid_dt_spec, + "invalid_dt_spec", + "Invalid literal for type Datetime", + false, + }, + [c4m_err_invalid_date_spec] = { + c4m_err_invalid_date_spec, + "invalid_date_spec", + "Invalid literal for type Date", + false, + }, + [c4m_err_invalid_time_spec] = { + c4m_err_invalid_time_spec, + "invalid_time_spec", + "Invalid literal for type Time", + false, + }, + [c4m_err_invalid_size_lit] = { + c4m_err_invalid_size_lit, + "invalid_size_lit", + "Invalid literal for type Size", + false, + }, + [c4m_err_invalid_duration_lit] = { + c4m_err_invalid_duration_lit, + "invalid_duration_lit", + "Invalid literal for type Duration", + false, + }, + #ifdef C4M_DEV [c4m_err_void_print] = { c4m_err_void_print, diff --git a/src/compiler/lex.c b/src/compiler/lex.c index 8255b04d..60f54ca3 100644 --- a/src/compiler/lex.c +++ b/src/compiler/lex.c @@ -109,15 +109,15 @@ c4m_token_type_to_string(c4m_token_kind_t tk) typedef struct { c4m_module_compile_ctx *ctx; - c4m_codepoint_t *start; - c4m_codepoint_t *end; - c4m_codepoint_t *pos; - c4m_codepoint_t *line_start; - c4m_token_t *last_token; - size_t token_id; - size_t line_no; - size_t cur_tok_line_no; - size_t cur_tok_offset; + c4m_codepoint_t *start; + c4m_codepoint_t *end; + c4m_codepoint_t *pos; + c4m_codepoint_t *line_start; + c4m_token_t *last_token; + size_t token_id; + size_t line_no; + size_t cur_tok_line_no; + size_t cur_tok_offset; } lex_state_t; // These helpers definitely require us to keep names consistent internally. @@ -128,11 +128,11 @@ typedef struct { // - a lit modifier at the end; if there is, it copies it into the token. // - LEX_ERROR adds an error to the broader context object, and longjumps. #define TOK(kind) output_token(state, kind) -#define LITERAL_TOK(kind, amt) \ +#define LITERAL_TOK(kind, amt, syntax) \ output_token(state, kind); \ state->last_token->adjustment = amt; \ capture_lit_text(state->last_token); \ - handle_lit_mod(state) + handle_lit_mod(state, syntax) #define LEX_ERROR(code) \ fill_lex_error(state, code); \ C4M_CRAISE("Exception:" #code "\n") @@ -234,8 +234,12 @@ skip_optional_newline(lex_state_t *state) } static inline void -handle_lit_mod(lex_state_t *state) +handle_lit_mod(lex_state_t *state, c4m_lit_syntax_t syntax) { + c4m_token_t *tok = state->last_token; + + tok->syntax = syntax; + if (peek(state) != '\'') { return; } @@ -249,7 +253,6 @@ handle_lit_mod(lex_state_t *state) size_t n = (size_t)(state->pos - lm_start); - c4m_token_t *tok = state->last_token; tok->literal_modifier = c4m_to_utf8( c4m_new(c4m_type_utf32(), c4m_kw("length", @@ -414,7 +417,7 @@ scan_int_or_float_literal(lex_state_t *state) int float_strlen = (int)(endp - u8->data); if (float_strlen > float_ix) { state->pos = state->start + float_strlen; - LITERAL_TOK(c4m_tt_float_lit, 0); + LITERAL_TOK(c4m_tt_float_lit, 0, ST_Float); state->last_token->literal_value = ((c4m_box_t)value).v; return; } @@ -455,7 +458,7 @@ scan_int_or_float_literal(lex_state_t *state) finished_int:; uint64_t n = (uint64_t)val; state->pos = state->start + i; - LITERAL_TOK(c4m_tt_int_lit, 0); + LITERAL_TOK(c4m_tt_int_lit, 0, ST_Base10); state->last_token->literal_value = (void *)n; return; } @@ -490,7 +493,7 @@ scan_hex_literal(lex_state_t *state) advance(state); continue; default: - LITERAL_TOK(c4m_tt_hex_lit, 0); + LITERAL_TOK(c4m_tt_hex_lit, 0, ST_Hex); return; } } @@ -539,7 +542,7 @@ scan_tristring(lex_state_t *state) break; case '"': if (++quote_count == 3) { - LITERAL_TOK(c4m_tt_string_lit, 3); + LITERAL_TOK(c4m_tt_string_lit, 3, ST_2Quote); return; } continue; // breaking would reset quote count. @@ -593,7 +596,7 @@ scan_string_literal(lex_state_t *state) continue; case '"': finish_single_quote: - LITERAL_TOK(c4m_tt_string_lit, 1); + LITERAL_TOK(c4m_tt_string_lit, 1, ST_2Quote); return; default: continue; @@ -657,7 +660,7 @@ scan_char_literal(lex_state_t *state) } finish_up: - LITERAL_TOK(c4m_tt_char_lit, 1); + LITERAL_TOK(c4m_tt_char_lit, 1, ST_1Quote); return; } @@ -765,7 +768,7 @@ scan_id_or_keyword(lex_state_t *state) switch (r) { case c4m_tt_true: case c4m_tt_false: - LITERAL_TOK(r, 0); + LITERAL_TOK(r, 0, ST_Bool); return; case c4m_tt_float_lit: { c4m_utf32_t *u32 = c4m_new( @@ -778,7 +781,7 @@ scan_id_or_keyword(lex_state_t *state) c4m_utf8_t *u8 = c4m_to_utf8(u32); double value = strtod((char *)u8->data, NULL); - LITERAL_TOK(r, 0); + LITERAL_TOK(r, 0, ST_Float); state->last_token->literal_value = ((c4m_box_t)value).v; return; } @@ -1037,21 +1040,21 @@ lex(lex_state_t *state) skip_optional_newline(state); continue; case '}': - LITERAL_TOK(c4m_tt_rbrace, 0); + LITERAL_TOK(c4m_tt_rbrace, 0, ST_Dict); continue; case '[': TOK(c4m_tt_lbracket); skip_optional_newline(state); continue; case ']': - LITERAL_TOK(c4m_tt_rbracket, 0); + LITERAL_TOK(c4m_tt_rbracket, 0, ST_List); continue; case '(': TOK(c4m_tt_lparen); skip_optional_newline(state); continue; case ')': - LITERAL_TOK(c4m_tt_rparen, 0); + LITERAL_TOK(c4m_tt_rparen, 0, ST_Tuple); continue; case '&': if (peek(state) == '=') { diff --git a/src/core/gcbase.c b/src/core/gcbase.c index 790123e5..69618a10 100644 --- a/src/core/gcbase.c +++ b/src/core/gcbase.c @@ -759,3 +759,9 @@ c4m_alloc_from_arena(c4m_arena_t **arena_ptr, assert(raw != NULL); return (void *)(raw->data); } + +void +c4m_header_gc_bits(uint64_t *bitfield, c4m_base_obj_t *alloc) +{ + c4m_mark_obj_to_addr(bitfield, alloc, &alloc->concrete_type); +} diff --git a/src/core/init.c b/src/core/init.c index 5d435736..853b4805 100644 --- a/src/core/init.c +++ b/src/core/init.c @@ -6,30 +6,17 @@ char **c4m_stashed_argv; char **c4m_stashed_envp; -// A few builtins here; will break this out soon. -uint64_t -c4m_clz(uint64_t n) -{ - return __builtin_clzll(n); -} - -uint64_t -c4m_rand() -{ - return c4m_rand64(); -} - static void -c4m_register_builtins() +c4m_register_builtins(void) { c4m_add_static_function(c4m_new_utf8("c4m_clz"), c4m_clz); c4m_add_static_function(c4m_new_utf8("c4m_gc_remove_hold"), c4m_gc_remove_hold); - c4m_add_static_function(c4m_new_utf8("c4m_rand"), c4m_rand); + c4m_add_static_function(c4m_new_utf8("c4m_rand64"), c4m_rand64); } c4m_list_t * -c4m_get_program_arguments() +c4m_get_program_arguments(void) { c4m_list_t *result = c4m_list(c4m_type_utf8()); char **cur = c4m_stashed_argv + 1; // Skip argv0. @@ -43,7 +30,7 @@ c4m_get_program_arguments() } c4m_utf8_t * -c4m_get_argv0() +c4m_get_argv0(void) { return c4m_new_utf8(*c4m_stashed_argv); } @@ -106,7 +93,7 @@ c4m_get_env(c4m_utf8_t *name) } c4m_dict_t * -c4m_environment() +c4m_environment(void) { c4m_dict_t *result = c4m_new(c4m_type_dict(c4m_type_utf8(), c4m_type_utf8())); @@ -121,7 +108,7 @@ c4m_list_t *con4m_path = NULL; c4m_set_t *con4m_extensions = NULL; c4m_utf8_t * -c4m_con4m_root() +c4m_con4m_root(void) { if (con4m_root == NULL) { con4m_root = c4m_get_env(c4m_new_utf8("CON4M_ROOT")); @@ -148,7 +135,7 @@ c4m_con4m_root() } c4m_utf8_t * -c4m_system_module_path() +c4m_system_module_path(void) { c4m_list_t *l = c4m_list(c4m_type_utf8()); @@ -159,7 +146,7 @@ c4m_system_module_path() } static void -c4m_init_path() +c4m_init_path(void) { c4m_list_t *parts; @@ -275,6 +262,53 @@ _c4m_set_package_search_path(c4m_utf8_t *dir, ...) va_end(args); } +#ifdef C4M_STATIC_FFI_BINDING +#define FSTAT(x) c4m_add_static_function(c4m_new_utf8(#x), x) +#else +#define FSTAT(x) +#endif + +void +c4m_add_static_symbols(void) +{ + FSTAT(c4m_list_append); + FSTAT(c4m_wrapper_join); + FSTAT(c4m_str_upper); + FSTAT(c4m_str_lower); + FSTAT(c4m_str_split); + FSTAT(c4m_str_pad); + FSTAT(c4m_wrapper_hostname); + FSTAT(c4m_wrapper_os); + FSTAT(c4m_wrapper_arch); + FSTAT(c4m_wrapper_repr); + FSTAT(c4m_wrapper_to_str); + FSTAT(c4m_len); + FSTAT(c4m_snap_column); + FSTAT(c4m_now); + FSTAT(c4m_timestamp); + FSTAT(c4m_process_cpu); + FSTAT(c4m_thread_cpu); + FSTAT(c4m_uptime); + FSTAT(c4m_program_clock); + FSTAT(c4m_copy_object); + FSTAT(c4m_get_c_backtrace); + FSTAT(c4m_lookup_color); + FSTAT(c4m_to_vga); + FSTAT(c4m_read_utf8_file); + FSTAT(c4m_read_binary_file); + FSTAT(c4m_list_resize); + FSTAT(c4m_list_append); + FSTAT(c4m_list_sort); + FSTAT(c4m_list_pop); + FSTAT(c4m_list_contains); +} + +static void +c4m_initialize_library(void) +{ + c4m_init_program_timestamp(); + c4m_init_std_streams(); +} __attribute__((constructor)) void c4m_init(int argc, char **argv, char **envp) @@ -291,7 +325,7 @@ c4m_init(int argc, char **argv, char **envp) c4m_gc_register_root(&con4m_extensions, 1); c4m_gc_set_finalize_callback((void *)c4m_finalize_allocation); c4m_initialize_global_types(); - c4m_init_std_streams(); + c4m_initialize_library(); c4m_register_builtins(); c4m_init_path(); } diff --git a/src/core/literals.c b/src/core/literals.c index 14c01a66..4a4d3fe3 100644 --- a/src/core/literals.c +++ b/src/core/literals.c @@ -157,6 +157,8 @@ c4m_init_literal_handling() c4m_register_literal(ST_Base10, "char", C4M_T_CHAR); c4m_register_literal(ST_Base10, "f", C4M_T_F64); c4m_register_literal(ST_Base10, "f64", C4M_T_F64); + c4m_register_literal(ST_Base10, "sz", C4M_T_SIZE); + c4m_register_literal(ST_Base10, "size", C4M_T_SIZE); c4m_register_literal(ST_Hex, "", C4M_T_INT); c4m_register_literal(ST_Hex, "int", C4M_T_INT); c4m_register_literal(ST_Hex, "i64", C4M_T_INT); @@ -185,7 +187,7 @@ c4m_init_literal_handling() c4m_register_literal(ST_2Quote, "ip", C4M_T_IPV4); c4m_register_literal(ST_2Quote, "sz", C4M_T_SIZE); c4m_register_literal(ST_2Quote, "size", C4M_T_SIZE); - c4m_register_literal(ST_2Quote, "url", C4M_T_URL); + c4m_register_literal(ST_2Quote, "url", C4M_T_UTF8); c4m_register_literal(ST_1Quote, "", C4M_T_CHAR); c4m_register_literal(ST_1Quote, "c", C4M_T_CHAR); c4m_register_literal(ST_1Quote, "char", C4M_T_CHAR); @@ -198,18 +200,18 @@ c4m_init_literal_handling() c4m_register_literal(ST_List, "ol", C4M_T_GRID); c4m_register_literal(ST_List, "ul", C4M_T_GRID); c4m_register_literal(ST_List, "list", C4M_T_XLIST); - c4m_register_literal(ST_List, "f", C4M_T_FLIST); - c4m_register_literal(ST_List, "flist", C4M_T_FLIST); - c4m_register_literal(ST_List, "q", C4M_T_QUEUE); - c4m_register_literal(ST_List, "queue", C4M_T_QUEUE); + // c4m_register_literal(ST_List, "f", C4M_T_FLIST); + // c4m_register_literal(ST_List, "flist", C4M_T_FLIST); + // c4m_register_literal(ST_List, "q", C4M_T_QUEUE); + // c4m_register_literal(ST_List, "queue", C4M_T_QUEUE); c4m_register_literal(ST_List, "t", C4M_T_TREE); c4m_register_literal(ST_List, "tree", C4M_T_TREE); - c4m_register_literal(ST_List, "r", C4M_T_RING); - c4m_register_literal(ST_List, "ring", C4M_T_RING); - c4m_register_literal(ST_List, "log", C4M_T_LOGRING); - c4m_register_literal(ST_List, "logring", C4M_T_LOGRING); - c4m_register_literal(ST_List, "s", C4M_T_STACK); - c4m_register_literal(ST_List, "stack", C4M_T_STACK); + // c4m_register_literal(ST_List, "r", C4M_T_RING); + // c4m_register_literal(ST_List, "ring", C4M_T_RING); + // c4m_register_literal(ST_List, "log", C4M_T_LOGRING); + // c4m_register_literal(ST_List, "logring", C4M_T_LOGRING); + // c4m_register_literal(ST_List, "s", C4M_T_STACK); + // c4m_register_literal(ST_List, "stack", C4M_T_STACK); c4m_register_literal(ST_Dict, "", C4M_T_DICT); c4m_register_literal(ST_Dict, "d", C4M_T_DICT); c4m_register_literal(ST_Dict, "dict", C4M_T_DICT); @@ -287,6 +289,50 @@ c4m_parse_simple_lit(c4m_token_t *tok, c4m_lit_syntax_t *kptr, c4m_utf8_t **lm) return err; } +bool +c4m_fix_litmod(c4m_token_t *tok, c4m_pnode_t *pnode) +{ + // Precondition: pnode's type is concrete, simple, and no litmod + // was spec'd. + // + // Our goal is to pick the first litmod for the syntax that matches + // the type. + + uint64_t n; + c4m_dict_t *d = mod_map[tok->syntax]; + c4m_type_t *t = c4m_type_resolve(pnode->type); + c4m_builtin_t base_type = c4m_type_get_base_tid(t); + hatrack_dict_item_t *items = hatrack_dict_items_sort(d, &n); + + for (unsigned int i = 0; i < n; i++) { + if (base_type == (c4m_builtin_t)items[i].value) { + c4m_utf8_t *lm = items[i].key; + c4m_vtable_t *vtbl; + c4m_literal_fn fn; + c4m_compile_error_t err = c4m_err_no_error; + + tok->literal_modifier = lm; + + vtbl = (c4m_vtable_t *)c4m_base_type_info[base_type].vtable; + fn = (c4m_literal_fn)vtbl->methods[C4M_BI_FROM_LITERAL]; + + tok->literal_value = (*fn)(tok->text, + tok->syntax, + tok->literal_modifier, + &err); + if (err != c4m_err_no_error) { + return false; + } + + pnode->value = tok->literal_value; + + return true; + } + } + + return false; +} + bool c4m_type_has_list_syntax(c4m_type_t *t) { diff --git a/src/core/object.c b/src/core/object.c index c81d88ba..62d9371c 100644 --- a/src/core/object.c +++ b/src/core/object.c @@ -181,15 +181,15 @@ const c4m_dt_info_t c4m_base_type_info[C4M_NUM_BUILTIN_DTS] = { .hash_fn = HATRACK_DICT_KEY_TYPE_OBJ_PTR, }, [C4M_T_IPV4] = { - .name = "Ipaddr", + .name = "IPaddr", .typeid = C4M_T_IPV4, .vtable = &c4m_ipaddr_vtable, - .alloc_len = sizeof(struct sockaddr_in6), + .alloc_len = sizeof(c4m_ipaddr_t), .dt_kind = C4M_DT_KIND_primitive, .hash_fn = HATRACK_DICT_KEY_TYPE_OBJ_PTR, }, [C4M_T_IPV6] = { - .name = "Ipv6_unused", // Going to merge w/ ipv4 + .name = "IPv6_unused", // Going to merge w/ ipv4 .typeid = C4M_T_IPV6, .vtable = &c4m_ipaddr_vtable, .alloc_len = sizeof(struct sockaddr_in6), @@ -197,34 +197,44 @@ const c4m_dt_info_t c4m_base_type_info[C4M_NUM_BUILTIN_DTS] = { .hash_fn = HATRACK_DICT_KEY_TYPE_OBJ_PTR, }, [C4M_T_DURATION] = { - .name = "Duration", - .typeid = C4M_T_DURATION, - .dt_kind = C4M_DT_KIND_primitive, - .hash_fn = HATRACK_DICT_KEY_TYPE_OBJ_PTR, + .name = "Duration", + .typeid = C4M_T_DURATION, + .vtable = &c4m_duration_vtable, + .alloc_len = sizeof(struct timespec), + .dt_kind = C4M_DT_KIND_primitive, + .hash_fn = HATRACK_DICT_KEY_TYPE_OBJ_PTR, }, [C4M_T_SIZE] = { - .name = "Size", - .typeid = C4M_T_SIZE, - .dt_kind = C4M_DT_KIND_primitive, - .hash_fn = HATRACK_DICT_KEY_TYPE_OBJ_PTR, + .name = "Size", + .typeid = C4M_T_SIZE, + .vtable = &c4m_size_vtable, + .alloc_len = sizeof(c4m_size_t), + .dt_kind = C4M_DT_KIND_primitive, + .hash_fn = HATRACK_DICT_KEY_TYPE_OBJ_PTR, }, [C4M_T_DATETIME] = { - .name = "Datetime", - .typeid = C4M_T_DATETIME, - .dt_kind = C4M_DT_KIND_primitive, - .hash_fn = HATRACK_DICT_KEY_TYPE_OBJ_PTR, + .name = "Datetime", + .typeid = C4M_T_DATETIME, + .vtable = &c4m_datetime_vtable, + .alloc_len = sizeof(c4m_date_time_t), + .dt_kind = C4M_DT_KIND_primitive, + .hash_fn = HATRACK_DICT_KEY_TYPE_OBJ_PTR, }, [C4M_T_DATE] = { - .name = "Date", - .typeid = C4M_T_DATE, - .dt_kind = C4M_DT_KIND_primitive, - .hash_fn = HATRACK_DICT_KEY_TYPE_OBJ_PTR, + .name = "Date", + .typeid = C4M_T_DATE, + .vtable = &c4m_date_vtable, + .alloc_len = sizeof(c4m_date_time_t), + .dt_kind = C4M_DT_KIND_primitive, + .hash_fn = HATRACK_DICT_KEY_TYPE_OBJ_PTR, }, [C4M_T_TIME] = { - .name = "Time", - .typeid = C4M_T_TIME, - .dt_kind = C4M_DT_KIND_primitive, - .hash_fn = HATRACK_DICT_KEY_TYPE_OBJ_PTR, + .name = "Time", + .typeid = C4M_T_TIME, + .vtable = &c4m_time_vtable, + .alloc_len = sizeof(c4m_date_time_t), + .dt_kind = C4M_DT_KIND_primitive, + .hash_fn = HATRACK_DICT_KEY_TYPE_OBJ_PTR, }, [C4M_T_URL] = { .name = "Url", @@ -572,13 +582,7 @@ c4m_copy_object(c4m_obj_t obj) c4m_copy_fn ptr = (c4m_copy_fn)c4m_vtable(obj)->methods[C4M_BI_COPY]; if (ptr == NULL) { - c4m_utf8_t *err; - - err = c4m_cstr_format( - "Copying for '{}' objects is not " - "currently supported.", - c4m_get_my_type(obj)); - C4M_RAISE(err); + return obj; } return (*ptr)(obj); diff --git a/src/harness/con4m_base/test.c b/src/harness/con4m_base/test.c index 2266a077..acf0d910 100644 --- a/src/harness/con4m_base/test.c +++ b/src/harness/con4m_base/test.c @@ -21,29 +21,18 @@ bool c4m_definite_memcheck_error = false; #endif void -add_static_symbols() +add_static_test_symbols() { - c4m_add_static_function(c4m_new_utf8("strndup"), strndup); - c4m_add_static_function(c4m_new_utf8("c4m_list_append"), c4m_list_append); - c4m_add_static_function(c4m_new_utf8("c4m_join"), c4m_wrapper_join); - c4m_add_static_function(c4m_new_utf8("c4m_str_upper"), c4m_str_upper); - c4m_add_static_function(c4m_new_utf8("c4m_str_lower"), c4m_str_lower); - c4m_add_static_function(c4m_new_utf8("c4m_str_split"), c4m_str_xsplit); - c4m_add_static_function(c4m_new_utf8("c4m_str_pad"), c4m_str_pad); - c4m_add_static_function(c4m_new_utf8("c4m_hostname"), c4m_wrapper_hostname); - c4m_add_static_function(c4m_new_utf8("c4m_osname"), c4m_wrapper_os); - c4m_add_static_function(c4m_new_utf8("c4m_arch"), c4m_wrapper_arch); - c4m_add_static_function(c4m_new_utf8("c4m_repr"), c4m_wrapper_repr); - c4m_add_static_function(c4m_new_utf8("c4m_to_str"), c4m_wrapper_to_str); - c4m_add_static_function(c4m_new_utf8("c4m_len"), c4m_len); - c4m_add_static_function(c4m_new_utf8("c4m_snap_column"), c4m_snap_column); + c4m_add_static_symbols(); + c4m_add_static_function(c4m_new_utf8("strndup"), + strndup); } int main(int argc, char **argv, char **envp) { c4m_init(argc, argv, envp); - add_static_symbols(); + add_static_test_symbols(); c4m_install_default_styles(); c4m_terminal_dimensions(&c4m_term_width, NULL); diff --git a/src/io/file.c b/src/io/file.c index 3085ec3c..2e56b1ff 100644 --- a/src/io/file.c +++ b/src/io/file.c @@ -30,7 +30,7 @@ c4m_read_utf8_file(c4m_str_t *path) } c4m_buf_t * -c4m_binary_file(c4m_str_t *path) +c4m_read_binary_file(c4m_str_t *path) { c4m_buf_t *result = NULL; c4m_stream_t *stream = NULL; diff --git a/src/util/path.c b/src/util/path.c index 8cac6c1f..bbc8f036 100644 --- a/src/util/path.c +++ b/src/util/path.c @@ -121,23 +121,23 @@ raw_path_tilde_expand(c4m_utf8_t *in) } if (in->data[0] != '~') { - return c4m_str_xsplit(in, c4m_get_slash_const()); + return c4m_str_split(in, c4m_get_slash_const()); } - c4m_list_t *parts = c4m_str_xsplit(in, c4m_get_slash_const()); + c4m_list_t *parts = c4m_str_split(in, c4m_get_slash_const()); c4m_utf8_t *home = c4m_to_utf8(c4m_list_get(parts, 0, NULL)); if (c4m_str_codepoint_len(home) == 1) { c4m_list_set(parts, 0, c4m_empty_string()); - parts = c4m_list_plus(c4m_str_xsplit(c4m_get_user_dir(NULL), - c4m_get_slash_const()), + parts = c4m_list_plus(c4m_str_split(c4m_get_user_dir(NULL), + c4m_get_slash_const()), parts); } else { home->data++; c4m_list_set(parts, 0, c4m_empty_string()); - parts = c4m_list_plus(c4m_str_xsplit(c4m_get_user_dir(home), - c4m_get_slash_const()), + parts = c4m_list_plus(c4m_str_split(c4m_get_user_dir(home), + c4m_get_slash_const()), parts); home->data--; } @@ -165,11 +165,11 @@ c4m_resolve_path(c4m_utf8_t *s) return c4m_path_tilde_expand(s); case '/': return internal_normalize_and_join( - c4m_str_xsplit(s, c4m_get_slash_const())); + c4m_str_split(s, c4m_get_slash_const())); default: - parts = c4m_str_xsplit(c4m_get_current_directory(), - c4m_get_slash_const()); - c4m_list_plus_eq(parts, c4m_str_xsplit(s, c4m_get_slash_const())); + parts = c4m_str_split(c4m_get_current_directory(), + c4m_get_slash_const()); + c4m_list_plus_eq(parts, c4m_str_split(s, c4m_get_slash_const())); return internal_normalize_and_join(parts); } } diff --git a/src/util/wrappers.c b/src/util/wrappers.c index 591a0736..3ac01c86 100644 --- a/src/util/wrappers.c +++ b/src/util/wrappers.c @@ -23,7 +23,7 @@ c4m_wrapper_to_str(c4m_obj_t obj) } c4m_str_t * -c4m_wrapper_hostname() +c4m_wrapper_hostname(void) { struct utsname info; @@ -33,7 +33,7 @@ c4m_wrapper_hostname() } c4m_str_t * -c4m_wrapper_os() +c4m_wrapper_os(void) { struct utsname info; @@ -43,7 +43,7 @@ c4m_wrapper_os() } c4m_str_t * -c4m_wrapper_arch() +c4m_wrapper_arch(void) { struct utsname info; @@ -57,3 +57,9 @@ c4m_snap_column(c4m_grid_t *table, int64_t n) { c4m_set_column_style(table, n, "snap"); } + +uint64_t +c4m_clz(uint64_t n) +{ + return __builtin_clzll(n); +} diff --git a/sys/__init.c4m b/sys/__init.c4m index f722d972..e088612c 100644 --- a/sys/__init.c4m +++ b/sys/__init.c4m @@ -1 +1,6 @@ -use string \ No newline at end of file +use string +use time +use builtins +use random +use file +use list diff --git a/sys/builtins.c4m b/sys/builtins.c4m new file mode 100644 index 00000000..c6cfc2e7 --- /dev/null +++ b/sys/builtins.c4m @@ -0,0 +1,47 @@ +extern c4m_wrapper_hostname() -> ptr { + local: hostname() -> string +} + +extern c4m_wrapper_osname() -> ptr { + local: osname() -> string +} + +extern c4m_wrapper_arch() -> ptr { + local: arch() -> string +} + +extern c4m_len(ptr) -> i64 { + local: len(s: `x) -> int +} + +/* +extern c4m_marshal(ptr, ptr) -> void { + local: marshal(o: `x, s: stream) -> void +} + +extern c4m_unmarshal_to_type(ptr) -> ptr { + local: unmarshal(s: stream, t: typespec) -> `t +} + +extern c4m_wrapper_coerce(ptr, ptr) -> ptr { + local: coerce(o: `t, t: typespec) -> `v +} +*/ + +extern c4m_copy_object(ptr) -> ptr { + local: copy(o: `t) -> `t +} + +extern c4m_get_c_backtrace() -> ptr { + local: c_backtrace() -> grid +} + +# These should eventually move to a styling specific module + +extern c4m_lookup_color(ptr) -> i64 { + local: get_color_by_name(s: string) -> int +} + +extern c4m_to_vga(i64) -> i64 { + local: trucolor_to_vga(int) -> int +} \ No newline at end of file diff --git a/sys/file.c4m b/sys/file.c4m new file mode 100644 index 00000000..1e84fc3b --- /dev/null +++ b/sys/file.c4m @@ -0,0 +1,8 @@ +extern c4m_read_utf8_file(ptr) -> ptr { + local: read_file_utf8(string) -> string +} + +extern c4m_read_binary_file(ptr) -> ptr { + local: read_file_binary(string) -> buffer +} + diff --git a/sys/list.c4m b/sys/list.c4m new file mode 100644 index 00000000..1a1a62a9 --- /dev/null +++ b/sys/list.c4m @@ -0,0 +1,23 @@ +extern c4m_list_resize(ptr, u32) -> void { + local: set_size(l: list[`t], s: uint) -> void +} + +extern c4m_list_append(ptr, ptr) -> void { + local: append(l: list[`t], n: `t) -> void + box_values: false +} + +extern c4m_list_sort(ptr, ptr) -> void { + local: sort(l: list[`t], f: (`t, `t) -> i32) -> void +} +extern c4m_list_pop(ptr) -> ptr { + """ + This needs to convert to return a maybe[`t] + """ + local: pop(x: list[`t]) -> `t +} + +extern c4m_list_contains(ptr, ptr) -> bool { + local: contains(l: list[`t], item: `t) -> bool + box_values: false +} \ No newline at end of file diff --git a/sys/random.c4m b/sys/random.c4m new file mode 100644 index 00000000..4f998ce2 --- /dev/null +++ b/sys/random.c4m @@ -0,0 +1,3 @@ +extern c4m_rand64() -> i64 { + local: random() -> int +} diff --git a/sys/string.c4m b/sys/string.c4m index ae0a657e..c27f71fe 100644 --- a/sys/string.c4m +++ b/sys/string.c4m @@ -6,7 +6,7 @@ extern c4m_str_lower(ptr) -> ptr { local: lower(s: string) -> string } -extern c4m_join(ptr, ptr) -> ptr { +extern c4m_wrapper_join(ptr, ptr) -> ptr { local: join(l: list[string], s: string) -> string } @@ -17,3 +17,12 @@ extern c4m_str_split(ptr, ptr) -> ptr { extern c4m_str_pad(ptr, i64) -> ptr { local: pad(s1: string, n: int) -> string } + +extern c4m_wrapper_repr(ptr) -> ptr { + local: repr(s: `x) -> string +} + +extern c4m_wrapper_to_str(ptr) -> ptr { + local: str(s: `x) -> string +} + diff --git a/sys/time.c4m b/sys/time.c4m new file mode 100644 index 00000000..5a163be7 --- /dev/null +++ b/sys/time.c4m @@ -0,0 +1,18 @@ +extern c4m_now() -> ptr { + local: now() -> Duration +} +extern c4m_timestamp() -> ptr { + local: timestamp() -> Duration +} +extern c4m_process_cpu() -> ptr { + local: process_cpu() -> Duration +} +extern c4m_thread_cpu() -> ptr { + local: thread_cpu() -> Duration +} +extern c4m_uptime() -> ptr { + local: uptime() -> Duration +} +extern c4m_program_clock() -> ptr { + local: program_clock() -> Duration +} diff --git a/tests/basic17.c4m b/tests/basic17.c4m index e414aa4e..5fe6ef8d 100644 --- a/tests/basic17.c4m +++ b/tests/basic17.c4m @@ -5,9 +5,10 @@ test harness if 'output' key is not provided. Also tests FFI with 0 arguments. """ """ +$errors: """ -extern c4m_rand() -> i64 { - local: random() -> int +extern c4m_rand64() -> i64 { + local: r() -> int } -print(random()); \ No newline at end of file +print(r()) \ No newline at end of file diff --git a/tests/basic23.c4m b/tests/basic23.c4m index b38e315b..70a12250 100644 --- a/tests/basic23.c4m +++ b/tests/basic23.c4m @@ -1,16 +1,13 @@ """ Test extern list functions like append() that don't want a box when the type is concrete. + +The wrap now happens in the standard library. """ """ $output: 190 """ -extern c4m_list_append(ptr, ptr) -> void { - local: append(l: list[`x], item: `x) -> void - box_values: false -} - func f(n) { total = 0 l = [] diff --git a/tests/builtins.c4m b/tests/builtins.c4m index 6c280318..68acc968 100644 --- a/tests/builtins.c4m +++ b/tests/builtins.c4m @@ -8,30 +8,6 @@ Hello, world! 13 """ -extern c4m_hostname() -> ptr { - local: hostname() -> string -} - -extern c4m_osname() -> ptr { - local: osname() -> string -} - -extern c4m_arch() -> ptr { - local: arch() -> string -} - -extern c4m_repr(ptr) -> ptr { - local: repr(s: `x) -> string -} - -extern c4m_to_str(ptr) -> ptr { - local: str(s: `x) -> string -} - -extern c4m_len(ptr) -> i64 { - local: len(s: `x) -> int -} - x = "[h1]Hello, world!"'r print(repr(x)) print(str(x)) diff --git a/tests/dt.c4m b/tests/dt.c4m new file mode 100644 index 00000000..a1199910 --- /dev/null +++ b/tests/dt.c4m @@ -0,0 +1,22 @@ +""" +Date / time basics. + +Also, canonical repr probably should add TZ info when we have it. +""" +""" +$output: +1974-01-07 +12:31:00 +2024-07-22T18:01:25 +""" +var date : Date; +var time : Time; +var dt : Datetime; + +date = "Jan 7, 1974"; +time = "12:31pm"; +dt = "22 Jul 24 18:01:25 -0400"; + +print(date) +print(time) +print(dt) diff --git a/tests/duration.c4m b/tests/duration.c4m new file mode 100644 index 00000000..031cdcc8 --- /dev/null +++ b/tests/duration.c4m @@ -0,0 +1,19 @@ +""" +This has gone on too long. +""" +""" +$output: +2 days, 4 minutes, 1 second +1 day, 5 minutes +23 hours, 59 minutes, 1 second +""" +var d1: Duration + +d1 = "2 days, 4 mins 1s" +d2 = "1 day, 5 min"'duration + +print(d1) +print(d2) + +# The '-' here is a difference op, not subtraction. +print(d2 - d1) \ No newline at end of file diff --git a/tests/ip.c4m b/tests/ip.c4m new file mode 100644 index 00000000..cd7160c5 --- /dev/null +++ b/tests/ip.c4m @@ -0,0 +1,9 @@ +""" +Proof of life for IPs. +""" +""" +$output: +128.164.0.1 +""" +x = "128.164.0.1"'ip +print(x) diff --git a/tests/olitmod.c4m b/tests/olitmod.c4m new file mode 100644 index 00000000..91f1d3e3 --- /dev/null +++ b/tests/olitmod.c4m @@ -0,0 +1,16 @@ +""" +Ensure type checking is good. +""" +""" +$errors: +inconsistent_type +""" + +shouldwork = "2 gb"'sz + +var shouldnt: Duration + +shouldnt = "2 gb"'sz + +print(shouldwork) +print(shouldnt) diff --git a/tests/size.c4m b/tests/size.c4m new file mode 100644 index 00000000..23d43e5f --- /dev/null +++ b/tests/size.c4m @@ -0,0 +1,21 @@ +""" +Size matters. +""" +""" +$output: +10 MiB 3 Bytes +10 Mb 1 Bytes +1 Kb 23 Bytes +""" +s1 = "10485763 b"'sz +s2 = "10000001 bytes"'sz +s3 = "10000001bYtes"'sz + + +print(s1) +print(s2) + +assert(s2 == s3) + +# - for sizes is a difference op, not pure subtraction. +print(23'sz - 1046'sz) diff --git a/tests/url.c4m b/tests/url.c4m new file mode 100644 index 00000000..7bf901b5 --- /dev/null +++ b/tests/url.c4m @@ -0,0 +1,12 @@ +""" +Bad URL test. +""" +""" +$errors: +malformed_url +""" +good_url = "http://foo.bar.com/hello?x=2"'url +old_url = "ftp:///foo.bar.com/blah"'url +bad_url = "http:foo.bar.com"'url +print(good_url) +print(bad_url) \ No newline at end of file diff --git a/tests/zeta.c4m b/tests/zeta.c4m new file mode 100644 index 00000000..35ef0471 --- /dev/null +++ b/tests/zeta.c4m @@ -0,0 +1,12 @@ +print("[h1]Now (Time since the Unix Epoch)"'r) +print(now()) +print("[h1]Monotonic Timestamp (POSIX; no guaranteed start time)"'r) +print(timestamp()) +print("[h1]Process CPU"'r) +print(process_cpu()) +print("[h1]Thread CPU"'r) +print(thread_cpu()) +print("[h1]System Uptime (generally excludes sleep)"'r) +print(uptime()) +print("[h1]Program wall clock (all proceeding tests till now)"'r) +print(program_clock()) \ No newline at end of file diff --git a/todo/olitmod.c4m b/todo/olitmod.c4m deleted file mode 100644 index d3eed5c6..00000000 --- a/todo/olitmod.c4m +++ /dev/null @@ -1,6 +0,0 @@ -shouldwork = "2 gb"'sz - -var shouldnt: duration - -shouldnt = "2 gb"'sz - diff --git a/todo/olits.c4m b/todo/olits.c4m deleted file mode 100644 index 95673864..00000000 --- a/todo/olits.c4m +++ /dev/null @@ -1,2 +0,0 @@ -way1 = "2 gb"'sz -qwoi := 2 gb