Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect conflict between tensor field and non-tensor field in fieldset. #32174

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
package com.yahoo.schema.processing;

import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.document.TensorDataType;
import com.yahoo.schema.RankProfileRegistry;
import com.yahoo.schema.Schema;
import com.yahoo.schema.document.FieldSet;
Expand All @@ -11,6 +12,9 @@
import com.yahoo.schema.document.Stemming;
import com.yahoo.vespa.model.container.search.QueryProfiles;

import java.util.LinkedList;
import java.util.logging.Level;

/**
* Computes the right "index commands" for each fieldset in a search definition.
*
Expand All @@ -22,6 +26,8 @@
// (this requires adding normalizing and stemming settings to FieldSet).
public class FieldSetSettings extends Processor {

private static String fieldSetDocUrl = "https://docs.vespa.ai/en/reference/schema-reference.html#fieldset";

public FieldSetSettings(Schema schema,
DeployLogger deployLogger,
RankProfileRegistry rankProfileRegistry,
Expand All @@ -37,6 +43,7 @@ public void process(boolean validate, boolean documentsOnly) {
checkMatching(schema, fieldSet);
checkNormalization(schema, fieldSet);
checkStemming(schema, fieldSet);
checkTypes(schema, fieldSet);
}
}

Expand Down Expand Up @@ -104,4 +111,23 @@ private void checkStemming(Schema schema, FieldSet fieldSet) {
}
}

private void checkTypes(Schema schema, FieldSet fieldSet) {
var tensorFields = new LinkedList<String>();
var nonTensorFields = new LinkedList<String>();
for (String fieldName : fieldSet.getFieldNames()) {
ImmutableSDField field = schema.getField(fieldName);
if (field.getDataType() instanceof TensorDataType) {
tensorFields.add(field.getName());
} else {
nonTensorFields.add(field.getName());
}
}
if (!tensorFields.isEmpty() && !nonTensorFields.isEmpty()) {
var fullMsg = "For schema '" + schema.getName() + "', fieldset '" + fieldSet.getName() + "': " +
"Tensor fields ['" + String.join("', '", tensorFields) + "'] " +
"cannot be mixed with non-tensor fields ['" + String.join("', '", nonTensorFields) + "'] " +
"in the same fieldset. See " + fieldSetDocUrl;
deployLogger.logApplicationPackage(Level.WARNING, fullMsg);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.schema.processing;

import com.yahoo.config.model.application.provider.BaseDeployLogger;
import com.yahoo.schema.derived.TestableDeployLogger;
import org.junit.jupiter.api.Test;

import static com.yahoo.schema.ApplicationBuilder.createFromStrings;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;

public class FieldSetSettingsTestCase {

@Test
public void legalFieldTypeMix() {
assertDoesNotThrow(() -> createFromStrings(new BaseDeployLogger(), childSd("fieldset default { fields: ci,pi }"), parentSd()));
assertDoesNotThrow(() -> createFromStrings(new BaseDeployLogger(), childSd("fieldset default { fields: ct,pt }"), parentSd()));
}

@Test
public void warnableFieldTypeMix() {
var logger = new TestableDeployLogger();
assertDoesNotThrow(() -> createFromStrings(logger, childSd("fieldset default { fields: ci,ps }"), parentSd()));
assertArrayEquals(new String[]{
"For schema 'child', field 'ps': " +
"The matching settings for the fields in fieldset 'default' are inconsistent (explicitly or because of field type). " +
"This may lead to recall and ranking issues.",
"For schema 'child', field 'ps': " +
"The normalization settings for the fields in fieldset 'default' are inconsistent (explicitly or because of field type). " +
"This may lead to recall and ranking issues."}, logger.warnings.toArray());
}

@Test
public void illegalFieldTypeMix() {
var logger = new TestableDeployLogger();
assertDoesNotThrow(() -> createFromStrings(logger, childSd( "fieldset default { fields: ci, pt }"), parentSd()));
assertArrayEquals(new String[]{"For schema 'child', fieldset 'default': Tensor fields ['pt'] cannot be mixed with non-tensor fields ['ci'] in the same fieldset. " +
"See https://docs.vespa.ai/en/reference/schema-reference.html#fieldset"}, logger.warnings.toArray());
}


private static String childSd(String fieldSet) {
return """
schema child {
document child {
field ci type int {
indexing: attribute
}
field cs type string {
indexing: attribute
}
field ct type tensor(x[2]) {
indexing: attribute
}
field parent_ref type reference<parent> {
indexing: attribute
}
}
import field parent_ref.pi as pi { }
import field parent_ref.ps as ps { }
import field parent_ref.pt as pt { }
""" + fieldSet + """
}
""";
}
private static String parentSd() {
return """
schema parent {
document parent {
field pi type int {
indexing: attribute
}
field ps type string {
indexing: attribute
}
field pt type tensor(x[2]) {
indexing: attribute
}
}
}
""";
}
}