Skip to content

Commit

Permalink
Harden BaseDocValuesFormatTestCase (#13346) (#13347)
Browse files Browse the repository at this point in the history
We hit a Codec bug in Elasticsearch, but it went unnoticed because our
tests extend from BaseDocValuesFormatTestCase, which doesn't attempt to
read the doc-values of the same document twice. This change strengthens
BaseDocValuesFormatTestCase checks and randomly inserts that access
pattern.
  • Loading branch information
dnhatn authored May 7, 2024
1 parent d839086 commit b9457b7
Showing 1 changed file with 224 additions and 105 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1409,6 +1409,105 @@ protected void assertDVIterate(Directory dir) throws IOException {
ir.close();
}

protected void compareStoredFieldWithSortedNumericsDV(
DirectoryReader directoryReader, String storedField, String dvField) throws IOException {
for (LeafReaderContext leaf : directoryReader.leaves()) {
LeafReader reader = leaf.reader();
StoredFields storedFields = reader.storedFields();
SortedNumericDocValues docValues = reader.getSortedNumericDocValues(dvField);
if (docValues == null) {
// no stored values at all
for (int doc = 0; doc < reader.maxDoc(); doc++) {
assertArrayEquals(new String[0], storedFields.document(doc).getValues(storedField));
}
continue;
}
for (int doc = 0; doc < reader.maxDoc(); doc++) {
String[] storedValues = storedFields.document(doc).getValues(storedField);
if (storedValues.length == 0) {
assertFalse(docValues.advanceExact(doc));
continue;
}

switch (random().nextInt(3)) {
case 0:
assertEquals(doc, docValues.nextDoc());
break;
case 1:
assertEquals(doc, docValues.advance(doc));
break;
default:
assertTrue(docValues.advanceExact(doc));
break;
}
assertEquals(doc, docValues.docID());
int repeats = 1 + random().nextInt(3);
for (int r = 0; r < repeats; r++) {
if (r > 0 || random().nextBoolean()) {
assertTrue(docValues.advanceExact(doc));
}
assertEquals(storedValues.length, docValues.docValueCount());
for (int v = 0; v < docValues.docValueCount(); v++) {
assertEquals(storedValues[v], Long.toString(docValues.nextValue()));
}
}
}
// jump with advanceExact
int iters = 1 + random().nextInt(3);
for (int i = 0; i < iters; i++) {
docValues = reader.getSortedNumericDocValues(dvField);
for (int doc = random().nextInt(leaf.reader().maxDoc()); doc < reader.maxDoc(); doc++) {
String[] storedValues = storedFields.document(doc).getValues(storedField);
if (docValues.advanceExact(doc)) {
assertEquals(doc, docValues.docID());
int repeats = 1 + random().nextInt(3);
for (int r = 0; r < repeats; r++) {
if (r > 0 || random().nextBoolean()) {
assertTrue(docValues.advanceExact(doc));
}
assertEquals(storedValues.length, docValues.docValueCount());
for (int v = 0; v < docValues.docValueCount(); v++) {
assertEquals(storedValues[v], Long.toString(docValues.nextValue()));
}
}
} else {
assertArrayEquals(new String[0], storedValues);
}
doc += random().nextInt(5); // skip some docs
}
}
// jump with advance
for (int i = 0; i < iters; i++) {
docValues = reader.getSortedNumericDocValues(dvField);
int doc = random().nextInt(leaf.reader().maxDoc());
while (doc != NO_MORE_DOCS) {
int nextDoc = docValues.advance(doc);
// no stored fields in between
for (int d = doc; d < (nextDoc == NO_MORE_DOCS ? reader.maxDoc() : nextDoc); d++) {
String[] storedValues = storedFields.document(d).getValues(storedField);
assertArrayEquals(new String[0], storedValues);
}
doc = nextDoc;
if (doc != NO_MORE_DOCS) {
String[] storedValues = storedFields.document(doc).getValues(storedField);
int repeats = 1 + random().nextInt(3);
for (int r = 0; r < repeats; r++) {
if (r > 0 || random().nextBoolean()) {
assertTrue(docValues.advanceExact(doc));
}
assertEquals(storedValues.length, docValues.docValueCount());
for (int v = 0; v < docValues.docValueCount(); v++) {
assertEquals(storedValues[v], Long.toString(docValues.nextValue()));
}
}
doc = nextDoc + 1;
doc += random().nextInt(5); // skip some docs
}
}
}
}
}

private void doTestSortedNumericsVsStoredFields(LongSupplier counts, LongSupplier values)
throws Exception {
Directory dir = newDirectory();
Expand Down Expand Up @@ -1447,38 +1546,18 @@ private void doTestSortedNumericsVsStoredFields(LongSupplier counts, LongSupplie
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}

try (DirectoryReader reader = maybeWrapWithMergingReader(DirectoryReader.open(dir))) {
TestUtil.checkReader(reader);
compareStoredFieldWithSortedNumericsDV(reader, "stored", "dv");
}
// merge some segments and ensure that at least one of them has more than
// 256 values
writer.forceMerge(numDocs / 256);

writer.close();

// compare
DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(dir));
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
StoredFields storedFields = r.storedFields();
SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
for (int i = 0; i < r.maxDoc(); i++) {
if (i > docValues.docID()) {
docValues.nextDoc();
}
String[] expected = storedFields.document(i).getValues("stored");
if (i < docValues.docID()) {
assertEquals(0, expected.length);
} else {
String[] actual = new String[docValues.docValueCount()];
for (int j = 0; j < actual.length; j++) {
actual[j] = Long.toString(docValues.nextValue());
}
assertArrayEquals(expected, actual);
}
}
try (DirectoryReader reader = maybeWrapWithMergingReader(DirectoryReader.open(dir))) {
TestUtil.checkReader(reader);
compareStoredFieldWithSortedNumericsDV(reader, "stored", "dv");
}
ir.close();
dir.close();
IOUtils.close(writer, dir);
}

public void testBooleanNumericsVsStoredFields() throws Exception {
Expand Down Expand Up @@ -2270,6 +2349,116 @@ public void testSortedSetTermsEnum() throws IOException {
directory.close();
}

protected void compareStoredFieldWithSortedSetDV(
DirectoryReader directoryReader, String storedField, String dvField) throws IOException {
for (LeafReaderContext leaf : directoryReader.leaves()) {
LeafReader reader = leaf.reader();
StoredFields storedFields = reader.storedFields();
SortedSetDocValues docValues = reader.getSortedSetDocValues(dvField);
if (docValues == null) {
// no stored values at all
for (int doc = 0; doc < reader.maxDoc(); doc++) {
assertArrayEquals(new String[0], storedFields.document(doc).getValues(storedField));
}
continue;
}
// sequentially
for (int doc = 0; doc < reader.maxDoc(); doc++) {
String[] storedValues = storedFields.document(doc).getValues(storedField);
if (storedValues.length == 0) {
assertFalse(docValues.advanceExact(doc));
continue;
}
switch (random().nextInt(3)) {
case 0:
assertEquals(doc, docValues.nextDoc());
break;
case 1:
assertEquals(doc, docValues.advance(doc));
break;
default:
assertTrue(docValues.advanceExact(doc));
break;
}
assertEquals(doc, docValues.docID());
assertEquals(storedValues.length, docValues.docValueCount());
int repeats = 1 + random().nextInt(3);
for (int r = 0; r < repeats; r++) {
if (r > 0 || random().nextBoolean()) {
assertTrue(docValues.advanceExact(doc));
}
for (int v = 0; v < docValues.docValueCount(); v++) {
long ord = docValues.nextOrd();
assertEquals(storedValues[v], docValues.lookupOrd(ord).utf8ToString());
}
if (random().nextBoolean()) {
assertEquals(NO_MORE_ORDS, docValues.nextOrd());
}
}
}
// jump with advanceExact
int iters = 1 + random().nextInt(3);
for (int i = 0; i < iters; i++) {
docValues = reader.getSortedSetDocValues(dvField);
for (int doc = random().nextInt(leaf.reader().maxDoc()); doc < reader.maxDoc(); doc++) {
String[] storedValues = storedFields.document(doc).getValues(storedField);
if (docValues.advanceExact(doc)) {
assertEquals(doc, docValues.docID());
assertEquals(storedValues.length, docValues.docValueCount());
int repeats = 1 + random().nextInt(3);
for (int r = 0; r < repeats; r++) {
if (r > 0 || random().nextBoolean()) {
assertTrue(docValues.advanceExact(doc));
}
for (int v = 0; v < docValues.docValueCount(); v++) {
long ord = docValues.nextOrd();
assertEquals(storedValues[v], docValues.lookupOrd(ord).utf8ToString());
}
if (random().nextBoolean()) {
assertEquals(NO_MORE_ORDS, docValues.nextOrd());
}
}
} else {
assertArrayEquals(new String[0], storedValues);
}
doc += random().nextInt(5); // skip some docs
}
}
// jump with advance
for (int i = 0; i < iters; i++) {
docValues = reader.getSortedSetDocValues(dvField);
int doc = random().nextInt(leaf.reader().maxDoc());
while (doc != NO_MORE_DOCS) {
int nextDoc = docValues.advance(doc);
// no stored fields in between
for (int d = doc; d < (nextDoc == NO_MORE_DOCS ? reader.maxDoc() : nextDoc); d++) {
String[] storedValues = storedFields.document(d).getValues(storedField);
assertArrayEquals(new String[0], storedValues);
}
doc = nextDoc;
if (doc != NO_MORE_DOCS) {
int repeats = 1 + random().nextInt(3);
String[] storedValues = storedFields.document(doc).getValues(storedField);
for (int r = 0; r < repeats; r++) {
if (r > 0 || random().nextBoolean()) {
assertTrue(docValues.advanceExact(doc));
}
for (int v = 0; v < docValues.docValueCount(); v++) {
long ord = docValues.nextOrd();
assertEquals(storedValues[v], docValues.lookupOrd(ord).utf8ToString());
}
if (random().nextBoolean()) {
assertEquals(NO_MORE_ORDS, docValues.nextOrd());
}
}
doc = nextDoc + 1;
doc += random().nextInt(5); // skip some docs
}
}
}
}
}

protected void doTestSortedSetVsStoredFields(
int numDocs, int minLength, int maxLength, int maxValuesPerDoc, int maxUniqueValues)
throws Exception {
Expand Down Expand Up @@ -2316,93 +2505,23 @@ protected void doTestSortedSetVsStoredFields(
writer.commit();
}
}

// delete some docs
int numDeletions = random().nextInt(numDocs / 10);
if (VERBOSE) {
System.out.println("\nTEST: now delete " + numDeletions + " docs");
}
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}

// compare
if (VERBOSE) {
System.out.println("\nTEST: now get reader");
}
DirectoryReader ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
StoredFields storedFields = r.storedFields();
SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
for (int i = 0; i < r.maxDoc(); i++) {
String[] stringValues = storedFields.document(i).getValues("stored");
if (docValues != null) {
if (docValues.docID() < i) {
docValues.nextDoc();
}
}
if (docValues != null && stringValues.length > 0) {
assertEquals(i, docValues.docID());
assertEquals(stringValues.length, docValues.docValueCount());
for (String stringValue : stringValues) {
assert docValues != null;
long ord = docValues.nextOrd();
BytesRef scratch = docValues.lookupOrd(ord);
assertEquals(stringValue, scratch.utf8ToString());
}
assertEquals(NO_MORE_ORDS, docValues.nextOrd());
}
}
}
if (VERBOSE) {
System.out.println("\nTEST: now close reader");
}
ir.close();
if (VERBOSE) {
System.out.println("TEST: force merge");
try (DirectoryReader reader = writer.getReader()) {
TestUtil.checkReader(reader);
compareStoredFieldWithSortedSetDV(reader, "stored", "dv");
}
writer.forceMerge(1);

// compare again
ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
StoredFields storedFields = r.storedFields();
SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
for (int i = 0; i < r.maxDoc(); i++) {
String[] stringValues = storedFields.document(i).getValues("stored");
if (docValues.docID() < i) {
docValues.nextDoc();
}
if (stringValues.length > 0) {
assertEquals(i, docValues.docID());
assertEquals(stringValues.length, docValues.docValueCount());
for (String stringValue : stringValues) {
assert docValues != null;
long ord = docValues.nextOrd();
BytesRef scratch = docValues.lookupOrd(ord);
assertEquals(stringValue, scratch.utf8ToString());
}
assertEquals(NO_MORE_ORDS, docValues.nextOrd());
}
}
}
if (VERBOSE) {
System.out.println("TEST: close reader");
}
ir.close();
if (VERBOSE) {
System.out.println("TEST: close writer");
}
writer.close();
if (VERBOSE) {
System.out.println("TEST: close dir");
try (DirectoryReader reader = writer.getReader()) {
TestUtil.checkReader(reader);
compareStoredFieldWithSortedSetDV(reader, "stored", "dv");
}
dir.close();
IOUtils.close(writer, dir);
}

public void testSortedSetFixedLengthVsStoredFields() throws Exception {
Expand Down

0 comments on commit b9457b7

Please sign in to comment.