Skip to content

Commit

Permalink
Merge pull request #288 from simon987/dev
Browse files Browse the repository at this point in the history
v2.12.1
  • Loading branch information
simon987 authored Apr 23, 2022
2 parents a74726b + f87de89 commit 4e1109c
Show file tree
Hide file tree
Showing 21 changed files with 153 additions and 97 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ sist2 (Simple incremental search tool)
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
recommended!)*
3. *(or)* `docker pull simon987/sist2:2.12.0-x64-linux`
3. *(or)* `docker pull simon987/sist2:2.12.1-x64-linux`

1. See [Usage guide](docs/USAGE.md)

Expand Down
2 changes: 1 addition & 1 deletion docs/USAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ Both the `root` and `rewrite_url` fields are safe to manually modify from the

# Elasticsearch

Elasticsearch versions >=6.8.0, <8.0.0 are supported by sist2.
Elasticsearch versions >=6.8.0, 7.X.X and 8.X.X are supported by sist2.

Using a version >=7.14.0 is recommended to enable the following features:

Expand Down
34 changes: 1 addition & 33 deletions schema/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"delimiter": "."
},
"my_nGram_tokenizer": {
"type": "nGram",
"type": "ngram",
"min_gram": 3,
"max_gram": 3
}
Expand Down Expand Up @@ -55,37 +55,5 @@
]
}
}
},
"mappings": {
"dynamic_templates": [
{
"keyword_fields": {
"match_mapping_type": "string",
"match": "kw_*",
"mapping": {
"type": "keyword"
}
}
},
{
"integer_fields": {
"match_mapping_type": "*",
"match": "int_*",
"mapping": {
"type": "integer"
}
}
},
{
"meta_fields": {
"match_mapping_type": "*",
"match": "mt_*",
"mapping": {
"type": "keyword",
"index": false
}
}
}
]
}
}
1 change: 1 addition & 0 deletions scripts/before_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ rm -rf index.sist2/
python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c
python3 scripts/index_static.py > src/index/static_generated.c
python3 scripts/magic_static.py > src/magic_generated.c

printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h
8 changes: 8 additions & 0 deletions scripts/magic_static.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

try:
with open("/usr/lib/file/magic.mgc", "rb") as f:
data = f.read()
except:
data = bytes([])

print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))
3 changes: 3 additions & 0 deletions scripts/start_dev_es_6.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
docker run --rm -it --name "sist2-dev-es-6"\
-p 9202:9200 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:6.8.0
3 changes: 3 additions & 0 deletions scripts/start_dev_es_8.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
docker run --rm -it --name "sist2-dev-es"\
-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.1.2
2 changes: 2 additions & 0 deletions src/cli.c
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {

LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script)
Expand Down Expand Up @@ -512,6 +513,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {

LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline)
LOG_DEBUGF("cli.c", "arg dev=%d", args->dev)
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
Expand Down
3 changes: 3 additions & 0 deletions src/cli.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
typedef struct index_args {
char *es_url;
char *es_index;
int es_insecure_ssl;
char *index_path;
const char *script_path;
char *script;
Expand All @@ -68,6 +69,7 @@ typedef struct index_args {
typedef struct web_args {
char *es_url;
char *es_index;
int es_insecure_ssl;
char *listen_address;
char *credentials;
char *tag_credentials;
Expand All @@ -85,6 +87,7 @@ typedef struct web_args {
typedef struct exec_args {
char *es_url;
char *es_index;
int es_insecure_ssl;
char *index_path;
const char *script_path;
int async_script;
Expand Down
2 changes: 2 additions & 0 deletions src/ctx.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ typedef struct {

typedef struct {
char *es_url;
int es_insecure_ssl;
es_version_t *es_version;
char *es_index;
int batch_size;
Expand All @@ -97,6 +98,7 @@ typedef struct {
char *es_url;
es_version_t *es_version;
char *es_index;
int es_insecure_ssl;
int index_count;
char *auth_user;
char *auth_pass;
Expand Down
80 changes: 47 additions & 33 deletions src/index/elastic.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {

cJSON_AddStringToObject(line, "_id", id_str);
cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
cJSON_AddStringToObject(line, "_type", "_doc");
// cJSON_AddStringToObject(line, "_type", "_doc");
cJSON_AddItemReferenceToObject(line, "_source", document);

char *json = cJSON_PrintUnformatted(line);
Expand Down Expand Up @@ -119,7 +119,7 @@ void execute_update_script(const char *script, int async, const char index_id[SI
} else {
snprintf(url, sizeof(url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
}
response_t *r = web_post(url, str);
response_t *r = web_post(url, str, IndexCtx.es_insecure_ssl);
if (!async) {
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
}
Expand Down Expand Up @@ -150,7 +150,7 @@ void execute_update_script(const char *script, int async, const char index_id[SI
cJSON_Delete(resp);
}

void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
void *create_bulk_buffer(int max, int *count, size_t *buf_len, int legacy) {
es_bulk_line_t *line = Indexer->line_head;
*count = 0;

Expand All @@ -171,11 +171,20 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
while (line != NULL && *count < max) {
char action_str[256];
if (line->type == ES_BULK_LINE_INDEX) {
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
line->doc_id, Indexer->es_index
);

if (legacy) {
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
line->doc_id, Indexer->es_index
);
} else {
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
line->doc_id, Indexer->es_index
);
}

size_t action_str_len = strlen(action_str);
size_t line_len = strlen(line->line);
Expand Down Expand Up @@ -263,11 +272,11 @@ void _elastic_flush(int max) {

size_t buf_len;
int count;
void *buf = create_bulk_buffer(max, &count, &buf_len);
void *buf = create_bulk_buffer(max, &count, &buf_len, IS_LEGACY_VERSION(IndexCtx.es_version));

char bulk_url[4096];
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index);
response_t *r = web_post(bulk_url, buf);
response_t *r = web_post(bulk_url, buf, IndexCtx.es_insecure_ssl);

if (r->status_code == 0) {
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
Expand Down Expand Up @@ -393,7 +402,7 @@ void finish_indexer(char *script, int async_script, char *index_id) {
char url[4096];

snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_post(url, "");
response_t *r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);

Expand All @@ -402,24 +411,24 @@ void finish_indexer(char *script, int async_script, char *index_id) {
free(script);

snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
}

snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
free_response(r);

snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
free_response(r);
}

es_version_t *elastic_get_version(const char *es_url) {
response_t *r = web_get(es_url, 30);
es_version_t *elastic_get_version(const char *es_url, int insecure) {
response_t *r = web_get(es_url, 30, insecure);

char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
Expand Down Expand Up @@ -464,7 +473,7 @@ es_version_t *elastic_get_version(const char *es_url) {

void elastic_init(int force_reset, const char *user_mappings, const char *user_settings) {

es_version_t *es_version = elastic_get_version(IndexCtx.es_url);
es_version_t *es_version = elastic_get_version(IndexCtx.es_url, IndexCtx.es_insecure_ssl);
IndexCtx.es_version = es_version;

if (es_version == NULL) {
Expand All @@ -473,33 +482,33 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s

LOG_INFOF("elastic.c",
"Elasticsearch version is %s (supported=%d, legacy=%d)",
format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), USE_LEGACY_ES_SETTINGS(es_version));
format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), IS_LEGACY_VERSION(es_version));

if (!IS_SUPPORTED_ES_VERSION(es_version)) {
LOG_FATAL("elastic.c", "sist2 only supports Elasticsearch v6.8 or newer")
LOG_FATAL("elastic.c", "This elasticsearch version is not supported!")
}

char *settings = NULL;
if (USE_LEGACY_ES_SETTINGS(es_version)) {
settings = settings_json;
} else {
if (IS_LEGACY_VERSION(es_version)) {
settings = settings_legacy_json;
} else {
settings = settings_json;
}

// Check if index exists
char url[4096];
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_get(url, 30);
response_t *r = web_get(url, 30, IndexCtx.es_insecure_ssl);
int index_exists = r->status_code == 200;
free_response(r);

if (!index_exists || force_reset) {
r = web_delete(url);
r = web_delete(url, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
free_response(r);

snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "");
r = web_put(url, "", IndexCtx.es_insecure_ssl);

if (r->status_code != 200) {
print_error(r);
Expand All @@ -510,26 +519,31 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
free_response(r);

snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
free_response(r);

snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url);
r = web_put(url, pipeline_json);
r = web_put(url, pipeline_json, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
free_response(r);

snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, user_settings ? user_settings : settings);
r = web_put(url, user_settings ? user_settings : settings, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code);
if (r->status_code != 200) {
print_error(r);
LOG_FATAL("elastic.c", "Could not update user settings")
}
free_response(r);

snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, user_mappings ? user_mappings : mappings_json);
if (IS_LEGACY_VERSION(es_version)) {
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
} else {
snprintf(url, sizeof(url), "%s/%s/_mappings", IndexCtx.es_url, IndexCtx.es_index);
}

r = web_put(url, user_mappings ? user_mappings : mappings_json, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Update ES mappings <%d>", r->status_code);
if (r->status_code != 200) {
print_error(r);
Expand All @@ -538,7 +552,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
free_response(r);

snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
free_response(r);
}
Expand All @@ -548,7 +562,7 @@ cJSON *elastic_get_document(const char *id_str) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);

response_t *r = web_get(url, 3);
response_t *r = web_get(url, 3, WebCtx.es_insecure_ssl);
cJSON *json = NULL;
if (r->status_code == 200) {
char *tmp = malloc(r->size + 1);
Expand All @@ -566,7 +580,7 @@ char *elastic_get_status() {
snprintf(url, sizeof(url),
"%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index);

response_t *r = web_get(url, 30);
response_t *r = web_get(url, 30, IndexCtx.es_insecure_ssl);
cJSON *json = NULL;
char *status = malloc(128 * sizeof(char));
status[0] = '\0';
Expand Down
8 changes: 5 additions & 3 deletions src/index/elastic.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ typedef struct {
} es_version_t;

#define VERSION_GE(version, maj, min) ((version)->major > (maj) || ((version)->major == (maj) && (version)->minor >= (min)))
#define IS_SUPPORTED_ES_VERSION(es_version) ((es_version) != NULL && VERSION_GE((es_version), 6, 8))
#define USE_LEGACY_ES_SETTINGS(es_version) ((es_version) != NULL && (!VERSION_GE((es_version), 7, 14)))
#define VERSION_LT(version, maj, min) (!VERSION_GE(version, maj, min))

#define IS_SUPPORTED_ES_VERSION(es_version) ((es_version) != NULL && VERSION_GE((es_version), 6, 8) && VERSION_LT((es_version), 9, 0))
#define IS_LEGACY_VERSION(es_version) ((es_version) != NULL && VERSION_LT((es_version), 7, 14))

__always_inline
static const char *format_es_version(es_version_t *version) {
Expand Down Expand Up @@ -57,7 +59,7 @@ cJSON *elastic_get_document(const char *id_str);

char *elastic_get_status();

es_version_t *elastic_get_version(const char *es_url);
es_version_t *elastic_get_version(const char *es_url, int insecure);

void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]);

Expand Down
Loading

0 comments on commit 4e1109c

Please sign in to comment.