From 0dc4bfaf54b015d79cbd52a5d0d88f66bbdd79f2 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Tue, 17 Dec 2024 22:59:43 +0100 Subject: [PATCH 1/2] Try to warn about all fields having inconsistent settings when they are used in a field set Note that this is not done for match settings, as that will change behavior, but we should probably do that change, as it might be wrong now (e.g. you can get 'text' matching when the 2 fields in a fieldset have matching 'exact' and 'word') --- .../schema/processing/FieldSetSettings.java | 22 ++++--- .../processing/FieldSetSettingsTestCase.java | 57 ++++++++++++++++++- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/config-model/src/main/java/com/yahoo/schema/processing/FieldSetSettings.java b/config-model/src/main/java/com/yahoo/schema/processing/FieldSetSettings.java index 95866500918..f1937255273 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/FieldSetSettings.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/FieldSetSettings.java @@ -48,11 +48,18 @@ public void process(boolean validate, boolean documentsOnly) { } private void checkFieldNames(Schema schema, FieldSet fieldSet) { - for (String field : fieldSet.getFieldNames()) { - if (schema.getField(field) == null) - throw new IllegalArgumentException("For " + schema + ": Field '" + field + "' in " + - fieldSet + " does not exist."); - } + var invalidFieldNames = fieldSet.getFieldNames().stream() + .filter(f -> schema.getField(f) == null) + .map(f -> "'" + f + "'") + .toList(); + if (invalidFieldNames.isEmpty()) return; + + var message = "For " + schema + ": "; + if (invalidFieldNames.size() == 1) + message = message + "Field " + invalidFieldNames.get(0) + " in " + fieldSet + " does not exist."; + else + message = message + "Fields " + String.join(",", invalidFieldNames) + " in " + fieldSet + " do not exist."; + throw new IllegalArgumentException(message); } private void checkMatching(Schema schema, FieldSet fieldSet) { @@ -67,7 +74,10 @@ private void checkMatching(Schema schema, FieldSet fieldSet) { warn(schema, field.asField(), "The matching settings for the fields in " + fieldSet + " are inconsistent " + "(explicitly or because of field type). This may lead to recall and ranking issues. " + + "The matching setting that will be used for this fieldset is " + matching.getType() + ". " + "See " + fieldSetDocUrl); + // TODO: Remove (see FieldSetSettingsTestCase#inconsistentMatchingShouldStillSetMatchingForFieldSet) + // but when doing so matching for a fieldset might change return; } } @@ -88,7 +98,6 @@ private void checkNormalization(Schema schema, FieldSet fieldSet) { "The normalization settings for the fields in " + fieldSet + " are inconsistent " + "(explicitly or because of field type). This may lead to recall and ranking issues. " + "See " + fieldSetDocUrl); - return; } } } @@ -108,7 +117,6 @@ private void checkStemming(Schema schema, FieldSet fieldSet) { "' are inconsistent (explicitly or because of field type). " + "This may lead to recall and ranking issues. " + "See " + fieldSetDocUrl); - return; } } } diff --git a/config-model/src/test/java/com/yahoo/schema/processing/FieldSetSettingsTestCase.java b/config-model/src/test/java/com/yahoo/schema/processing/FieldSetSettingsTestCase.java index 7776cc926a5..610655c5810 100644 --- a/config-model/src/test/java/com/yahoo/schema/processing/FieldSetSettingsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/processing/FieldSetSettingsTestCase.java @@ -2,12 +2,19 @@ package com.yahoo.schema.processing; import com.yahoo.config.model.application.provider.BaseDeployLogger; +import com.yahoo.schema.ApplicationBuilder; import com.yahoo.schema.derived.TestableDeployLogger; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.parser.ParseException; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import static com.yahoo.schema.ApplicationBuilder.createFromStrings; +import static com.yahoo.schema.document.MatchType.EXACT; +import static com.yahoo.schema.document.MatchType.WORD; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; public class FieldSetSettingsTestCase { @@ -24,7 +31,8 @@ public void warnableFieldTypeMix() { assertArrayEquals(new String[]{ "For schema 'child', field 'ps': " + "The matching settings for the fields in fieldset 'default' are inconsistent (explicitly or because of field type). " + - "This may lead to recall and ranking issues. See https://docs.vespa.ai/en/reference/schema-reference.html#fieldset", + "This may lead to recall and ranking issues. The matching setting that will be used for this fieldset is TEXT. " + + "See https://docs.vespa.ai/en/reference/schema-reference.html#fieldset", "For schema 'child', field 'ps': " + "The normalization settings for the fields in fieldset 'default' are inconsistent (explicitly or because of field type). " + "This may lead to recall and ranking issues. See https://docs.vespa.ai/en/reference/schema-reference.html#fieldset"}, @@ -39,6 +47,33 @@ public void illegalFieldTypeMix() { "See https://docs.vespa.ai/en/reference/schema-reference.html#fieldset"}, logger.warnings.toArray()); } + @Test + @Disabled + // Test that match setting for a field will be a match settings one of the fields + // in the set has, not the default match setting for a field + // TODO: This now fails because setting match setting for a fieldset is done after + // checking if there are inconsistencies in match settings for fields in a fieldset, + // but code today return if it finds such an inconsistency WITHOUT setting match + // setting for the fieldset, which means it will end up being the default match setting + // (TEXT). As shown in this test, it should be either WORD or EXACT (fields are + // processed in lexical order of fioeld name, so the first field will determine which match + // setting is used. + public void inconsistentMatchingShouldStillSetMatchingForFieldSet() throws ParseException { + var logger = new TestableDeployLogger(); + + // a is field with word mathcing => word matching for fieldset + var builder = createFromStrings(logger, schemaWithMatchSettings("fieldset default { fields: a, b }", "a", "b")); + assertMatchType(builder, WORD); + + // a is field with exact mathcing => exact matchong for fieldset + builder = createFromStrings(logger, schemaWithMatchSettings("fieldset default { fields: a, b }", "b", "a")); + assertMatchType(builder, EXACT); + } + + private static void assertMatchType(ApplicationBuilder builder, MatchType matchType) { + var fieldSet = builder.getSchema().fieldSets().userFieldSets().values().iterator().next(); + assertEquals(matchType, fieldSet.getMatching().getType()); + } private static String childSd(String fieldSet) { return """ @@ -64,6 +99,7 @@ field ct type tensor(x[2]) { } """; } + private static String parentSd() { return """ schema parent { @@ -81,4 +117,23 @@ field pt type tensor(x[2]) { } """; } + + private static String schemaWithMatchSettings(String fieldSet, String fieldNameWithWordMatching, String fieldNameWithExactMatching) { + return """ + schema index_variants { + document index_variants { + field %s type string { + indexing: index + match: word + } + field %s type string { + indexing: index + match: exact + } + } + %s + } + """.formatted(fieldNameWithWordMatching, fieldNameWithExactMatching, fieldSet); + } + } From 7f396b8ee14523239aa1cebf445f7001b4ad33f8 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Wed, 18 Dec 2024 19:38:02 +0000 Subject: [PATCH 2/2] expand warning with information about fields. make sure that we report back the original fieldset matching as the effective outcome for now. --- .../schema/processing/FieldSetSettings.java | 25 +++++++++++++++---- .../processing/FieldSetSettingsTestCase.java | 7 +++--- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/config-model/src/main/java/com/yahoo/schema/processing/FieldSetSettings.java b/config-model/src/main/java/com/yahoo/schema/processing/FieldSetSettings.java index f1937255273..f23712d8cac 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/FieldSetSettings.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/FieldSetSettings.java @@ -71,11 +71,26 @@ private void checkMatching(Schema schema, FieldSet fieldSet) { matching = fieldMatching; } else { if ( ! matching.equals(fieldMatching)) { - warn(schema, field.asField(), - "The matching settings for the fields in " + fieldSet + " are inconsistent " + - "(explicitly or because of field type). This may lead to recall and ranking issues. " + - "The matching setting that will be used for this fieldset is " + matching.getType() + ". " + - "See " + fieldSetDocUrl); + final var buf = new StringBuilder(); + buf.append("For schema '").append(schema.getName()).append("': "); + buf.append("The matching settings in ").append(fieldSet); + buf.append(" are inconsistent (explicitly or because of field type). "); + buf.append("This may lead to recall and ranking issues. "); + Matching original = fieldSet.getMatching(); + if (original == null) { + buf.append("The fieldset will use matching TEXT. "); + } else { + buf.append("The fieldset will use matching ").append(original.getType()).append(". "); + } + var list = fieldSet.getFieldNames().stream() + .map(name -> schema.getField(name)) + .filter(f -> (f != null)) + .filter(f -> (f.getMatching() != null)) + .map(f -> " Field '" + f.asField().getName() + "' has matching " + f.getMatching().getType()) + .toList(); + buf.append(list); + buf.append(" See ").append(fieldSetDocUrl); + deployLogger.logApplicationPackage(Level.WARNING, buf.toString()); // TODO: Remove (see FieldSetSettingsTestCase#inconsistentMatchingShouldStillSetMatchingForFieldSet) // but when doing so matching for a fieldset might change return; diff --git a/config-model/src/test/java/com/yahoo/schema/processing/FieldSetSettingsTestCase.java b/config-model/src/test/java/com/yahoo/schema/processing/FieldSetSettingsTestCase.java index 610655c5810..e963b47bca5 100644 --- a/config-model/src/test/java/com/yahoo/schema/processing/FieldSetSettingsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/processing/FieldSetSettingsTestCase.java @@ -29,9 +29,10 @@ public void warnableFieldTypeMix() { var logger = new TestableDeployLogger(); assertDoesNotThrow(() -> createFromStrings(logger, childSd("fieldset default { fields: ci,ps }"), parentSd())); assertArrayEquals(new String[]{ - "For schema 'child', field 'ps': " + - "The matching settings for the fields in fieldset 'default' are inconsistent (explicitly or because of field type). " + - "This may lead to recall and ranking issues. The matching setting that will be used for this fieldset is TEXT. " + + "For schema 'child': " + + "The matching settings in fieldset 'default' are inconsistent (explicitly or because of field type). " + + "This may lead to recall and ranking issues. The fieldset will use matching TEXT. " + + "[ Field 'ci' has matching TEXT, Field 'ps' has matching WORD] " + "See https://docs.vespa.ai/en/reference/schema-reference.html#fieldset", "For schema 'child', field 'ps': " + "The normalization settings for the fields in fieldset 'default' are inconsistent (explicitly or because of field type). " +