Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update yaml parser to read in arrays and output them #1576

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions diag_manager/diag_yaml_format.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ diag_files:
### 2.1 Global Section
The diag_yaml requires “title” and the “baseDate”.
- The **title** is a string that labels the diag yaml. The equivalent in the legacy diag_table would be the experiment. It is recommended that each diag_yaml have a separate title label that is descriptive of the experiment that is using it.
- The **basedate** is an array of 6 integers indicating the base_date in the format [year month day hour minute second].
- The **basedate** is an array of 6 integers indicating the base_date in the format [year, month, day, hour, minute, second].
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you want to make it clear that it is a comma-separated array to help contrast from the original space-separated format?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes good idea, added that with c496f38, along with a note that spacing is not significant.

That commit also updates the rest of the test scripts to use the new base date format since they were missed originally.


**Example:**

In the YAML format:
```yaml
title: ESM4_piControl
base_date: 2022 5 26 12 3 1
base_date: [2022, 5, 26, 12, 3, 1]
```

In the legacy ascii format:
Expand Down Expand Up @@ -281,7 +281,7 @@ The sub region can be listed under the sub_region section as a dashed array. The
Bellow is a complete example of diag_table.yaml:
```yaml
title: test_diag_manager
base_date: 2 1 1 0 0 0
base_date: [2, 1, 1, 0, 0, 0]
diag_files:
- file_name: wild_card_name%4yr%2mo%2dy%2hr
freq: 6 hours
Expand Down
7 changes: 4 additions & 3 deletions diag_manager/fms_diag_yaml.F90
Original file line number Diff line number Diff line change
Expand Up @@ -1667,13 +1667,14 @@ subroutine fms_diag_yaml_out()
call fms_f2c_string( keys(1)%key2, 'base_date')
basedate_loc = diag_yaml%get_basedate()
tmpstr1 = ''; tmpstr2 = ''
tmpstr1 = string(basedate_loc(1))
tmpstr1 = '[ '//string(basedate_loc(1))
bensonr marked this conversation as resolved.
Show resolved Hide resolved
tmpstr2 = trim(tmpstr1)
do i=2, basedate_size
tmpstr1 = string(basedate_loc(i))
tmpstr2 = trim(tmpstr2) // ' ' // trim(tmpstr1)
tmpstr2 = trim(tmpstr2) // ', ' // trim(tmpstr1)
enddo
call fms_f2c_string(vals(1)%val2, trim(tmpstr2))
tmpstr1 = trim(tmpstr2) // ']'
call fms_f2c_string(vals(1)%val2, trim(tmpstr1))
call yaml_out_add_level2key('diag_files', keys(1))
key3_i = 0
!! tier 2 - diag files
Expand Down
61 changes: 55 additions & 6 deletions parser/yaml_output_functions.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,9 @@ void keyerror(yaml_event_t * event, yaml_emitter_t * emitter){
fprintf(stderr, "WARNING: YAML_OUTPUT: Failed to emit event %d: %s\n", event->type, emitter->problem);
fprintf(stdout, "WARNING: YAML_OUTPUT: Failed to emit event %d: %s\n", event->type, emitter->problem);
}
/* \breif Writes the key/value pairs of the fmsyamloutkeys and fmsyamloutvalues structs
/* \brief Writes the key/value pairs of the fmsyamloutkeys and fmsyamloutvalues structs
* \note If second value (val2) in struct starts with '[' it will be assumed the value is a yaml array
* There may be slight differences in spacing for array outputs vs what is read in.
* \param emitter The libyaml emitter for this file
* \param event The libyaml eent pointer
* \param aindex The index of keys and vals that are being written currently
Expand Down Expand Up @@ -124,11 +126,58 @@ void write_keys_vals_yaml (yaml_emitter_t * emitter, yaml_event_t * event , int
keyerror(event, emitter);
return;
}
yaml_scalar_event_initialize(event, NULL, (yaml_char_t *)YAML_STR_TAG,
(yaml_char_t *)vals[aindex].val2, strlen(vals[aindex].val2), 1, 0, YAML_PLAIN_SCALAR_STYLE);
if (!yaml_emitter_emit(emitter, event)){
keyerror(event, emitter);
return;

// check if we're writing an array
if(vals[aindex].val2[0] != '['){
yaml_scalar_event_initialize(event, NULL, NULL,
(yaml_char_t *)vals[aindex].val2, strlen(vals[aindex].val2), 1, 0, YAML_PLAIN_SCALAR_STYLE);
if (!yaml_emitter_emit(emitter, event)){
keyerror(event, emitter);
return;
}
} else {
// parse the string for individual elements
char *buff = vals[aindex].val2;
char cbuff[2]; // single char buffer for appending
cbuff[1] = '\0';
char elements[16][255];
int ecount=0; // count of elements
strcpy(elements[0], "");
for(int i=1; i< strlen(buff)-1; i++){
if(buff[i] != ' ' && buff[i] != ','){
cbuff[0] = buff[i];
strcat(elements[ecount], cbuff);
}
if(buff[i] == ','){
ecount++;
if(ecount >= 16) {
printf("Error: element count in write_keys_vals_yaml greater than max value of 16");
return;
}
strcpy(elements[ecount], "");
}
}
// start flow sequence
yaml_sequence_start_event_initialize(event, NULL, NULL, 1, YAML_FLOW_SEQUENCE_STYLE);
if (!yaml_emitter_emit(emitter, event)){
keyerror(event, emitter);
return;
}
// loop through and write elements
for (int i=0; i <= ecount; i++){
yaml_scalar_event_initialize(event, NULL, NULL,
(yaml_char_t *)elements[i], strlen(elements[i]), 1, 0, YAML_PLAIN_SCALAR_STYLE);
if (!yaml_emitter_emit(emitter, event)){
keyerror(event, emitter);
return;
}
}
// end flow sequence
yaml_sequence_end_event_initialize(event);
if (!yaml_emitter_emit(emitter, event)){
keyerror(event, emitter);
return;
}
}
}
if (keys[aindex].key3[0] !='\0') {
Expand Down
2 changes: 2 additions & 0 deletions parser/yaml_parser.F90
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ module yaml_parser_mod
interface

!> @brief Private c function that opens and parses a yaml file (see yaml_parser_binding.c)
!! Any arrays (formatted as [a, b, c] or a multiline '-' tabbed list) will be
!! read in as a space-separated list of characters
!! @return Flag indicating if the read was successful
function open_and_parse_file_wrap(filename, file_id) bind(c) &
result(error_code)
Expand Down
51 changes: 38 additions & 13 deletions parser/yaml_parser_binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,9 @@ int open_and_parse_file_wrap(char *filename, int *file_id)
char current_layername[255]; /* Name of the current block */
int i; /* To minimize the typing :) */
int j; /* To minimize the typing :) */
bool is_flow_sequence = false;/* Flag indicating if within a flow sequence (aka a yaml array ie. [1, 2, 3, 4] or - tabbed list)*/
bool is_first = false; /* Flag indicating if current item is the first item in the flow sequence*/
char buff[255]; /* String buffer for individual array values*/

if (nfiles == 0 )
{
Expand Down Expand Up @@ -356,6 +359,19 @@ int open_and_parse_file_wrap(char *filename, int *file_id)
is_key = false;
break;
}
case YAML_FLOW_SEQUENCE_START_TOKEN:
{
is_key = false;
is_flow_sequence = true;
is_first = true;
break;
}
case YAML_FLOW_SEQUENCE_END_TOKEN:
{
is_key = false;
is_flow_sequence = false;
break;
}
case YAML_BLOCK_ENTRY_TOKEN:
{
layer = layer + 1;
Expand Down Expand Up @@ -386,19 +402,28 @@ int open_and_parse_file_wrap(char *filename, int *file_id)
{
if ( ! is_key)
{
current_parent = parent[layer];
strcpy(current_layername, "");
key_count = key_count + 1;
i = key_count;
my_files.files[j].keys = realloc(my_files.files[j].keys, (i+1)*sizeof(key_value_pairs));
my_files.files[j].keys[i].key_number=i;
my_files.files[j].keys[i].parent_key = current_parent;
strcpy(my_files.files[j].keys[i].parent_name, current_layername);
strcpy(my_files.files[j].keys[i].key, key_value);
strcpy(my_files.files[j].keys[i].value, token.data.scalar.value);
my_files.files[j].nkeys = key_count;
/* printf("----> LAYER:%i LAYER_NAME=%s PARENT:%i, KEYCOUNT:%i KEY: %s VALUE: %s \n", layer, current_layername, current_parent, key_count, key_value, token.data.scalar.value); */
strcpy(key_value,"");
if (! is_flow_sequence || is_first) // if either a normal value or the first element in a flow sequence
{
current_parent = parent[layer];
strcpy(current_layername, "");
key_count = key_count + 1;
i = key_count;
my_files.files[j].keys = realloc(my_files.files[j].keys, (i+1)*sizeof(key_value_pairs));
my_files.files[j].keys[i].key_number=i;
my_files.files[j].keys[i].parent_key = current_parent;
strcpy(my_files.files[j].keys[i].parent_name, current_layername);
strcpy(my_files.files[j].keys[i].key, key_value);
strcpy(my_files.files[j].keys[i].value, token.data.scalar.value);
my_files.files[j].nkeys = key_count;
/* printf("----> LAYER:%i LAYER_NAME=%s PARENT:%i, KEYCOUNT:%i KEY: %s VALUE: %s \n", layer, current_layername, current_parent, key_count, key_value, token.data.scalar.value); */
strcpy(key_value,"");
is_first = false;
} else { // if an item in a flow sequence
strcpy(buff, token.data.scalar.value);
strcat(my_files.files[j].keys[i].value, " ");
strcat(my_files.files[j].keys[i].value, buff);
}

}
else
{strcpy(key_value,token.data.scalar.value);}
Expand Down
14 changes: 7 additions & 7 deletions test_fms/diag_manager/test_diag_manager2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ if [ -z "${skipflag}" ]; then

cat <<_EOF > diag_table.yaml
title: test_diag_manager
base_date: 2 1 1 0 0 0
base_date: [ 2, 1, 1, 0, 0, 0 ]
diag_files:
- file_name: wild_card_name%4yr%2mo%2dy%2hr
filename_time: end
Expand Down Expand Up @@ -592,7 +592,7 @@ my_test_count=`expr $my_test_count + 1`

cat <<_EOF > diag_table.yaml
title: test_diag_manager
base_date: 2 1 1 0 0 0
base_date: [2, 1, 1, 0, 0, 0 ]
diag_files:
- file_name: wild_card_name%4yr%2mo%2dy%2hr
filename_time: end
Expand Down Expand Up @@ -673,7 +673,7 @@ _EOF
printf "&diag_manager_nml \n use_modern_diag = .true. \n/" | cat > input.nml
cat <<_EOF > diag_table.yaml
title: test_diag_manager
base_date: 2 1 1 0 0 0
base_date: [2, 1, 1, 0, 0, 0 ]
diag_files:
- file_name: file1
freq: 6 hours
Expand Down Expand Up @@ -722,7 +722,7 @@ _EOF
printf "&diag_manager_nml \n use_modern_diag = .true. \n do_diag_field_log = .true. \n/" | cat > input.nml
cat <<_EOF > diag_table.yaml
title: test_diag_manager
base_date: 2 1 1 0 0 0
base_date: [2, 1, 1, 0, 0, 0 ]

diag_files:
- file_name: static_file
Expand Down Expand Up @@ -886,7 +886,7 @@ _EOF
cat <<_EOF > diag_out_ref.yaml
---
title: test_diag_manager
base_date: 2 1 1 0 0 0
base_date: [2, 1, 1, 0, 0, 0]
diag_files:
- file_name: static_file
freq: -1
Expand Down Expand Up @@ -1296,7 +1296,7 @@ test_expect_success "check modern diag manager yaml output (test $my_test_count)
printf "&diag_manager_nml \n use_modern_diag = .true. \n use_clock_average = .true. \n /" | cat > input.nml
cat <<_EOF > diag_table.yaml
title: test_diag_manager
base_date: 2 1 1 0 0 0
base_date: [2, 1, 1, 0, 0, 0 ]

diag_files:
- file_name: file1_clock
Expand All @@ -1318,7 +1318,7 @@ my_test_count=`expr $my_test_count + 1`
printf "&diag_manager_nml \n use_modern_diag = .true. \n use_clock_average = .false. \n /" | cat > input.nml
cat <<_EOF > diag_table.yaml
title: test_diag_manager
base_date: 2 1 1 0 0 0
base_date: [2, 1, 1, 0, 0, 0 ]

diag_files:
- file_name: file1_forecast
Expand Down
Loading