Skip to content

Commit

Permalink
Merge branch 'release/2.1.5'
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrew Clegg committed Sep 11, 2013
2 parents e1fc5ec + 6b1ba4d commit 2d077df
Show file tree
Hide file tree
Showing 8 changed files with 162 additions and 44 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
Release 2.1.5
-------------

More documentation improvements, code cleanup.

Workaround for a compiler issue in some versions of Java 6:

https://github.com/pearson-enabling-technologies/elasticsearch-approx-plugin/issues/41

Release 2.1.4
-------------

Documentation improvements.

Release 2.1.3
-------------

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ Plugin < 1.3.0: ElasticSearch 0.19.X, tested on 0.19.11

Plugin 1.3.X: ElasticSearch 0.20.X, tested on 0.20.6

Plugin 2.1.4: ElasticSearch 0.90.2, plus significant feature and performance improvements, and breaking API changes
Plugin 2.1.5: ElasticSearch 0.90.2, plus significant feature and performance improvements, and breaking API changes, compared to 1.3.X branch

ElasticSearch 0.90.3 is not supported yet.

**N.B.** If you are upgrading from a previous version to 2.1.0, please read the
**N.B.** If you are upgrading from a previous version to 2.1.X, please read the
following carefully, as the syntax (and semantics) have changed in several places.


Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.pearson.entech</groupId>
<artifactId>elasticsearch-approx-plugin</artifactId>
<version>2.1.4</version>
<version>2.1.5</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,16 @@
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.fielddata.plain.LongArrayIndexFieldData;

/**
* A buildable collector which iterates through value of a long datetime field, applying timezone rounding to them.
*
* @param <V> the IndexFieldData type of the datetime field
*/
public abstract class TimestampFirstCollector<V extends AtomicFieldData<? extends ScriptDocValues>> extends BuildableCollector {

/**
* An empty iterator over long values.
*/
protected static final Iter EMPTY = new Iter.Empty();

private LongValues _keyFieldValues;
Expand All @@ -36,20 +44,34 @@ public abstract class TimestampFirstCollector<V extends AtomicFieldData<? extend
private BytesValues _valueFieldValues;
private BytesValues.Iter _valueFieldIter;

/**
* Create a new collector.
*
* @param keyFieldData key (datetime) field data
* @param valueFieldData value field data
* @param tzRounding time zone rounding
*/
public TimestampFirstCollector(final LongArrayIndexFieldData keyFieldData,
final IndexFieldData<V> valueFieldData, final TimeZoneRounding tzRounding) {
_keyFieldData = keyFieldData;
_valueFieldData = valueFieldData;
_tzRounding = tzRounding;
}

/**
* Create a new collector.
*
* @param keyFieldData key (datetime) field data
* @param tzRounding time zone rounding
*/
public TimestampFirstCollector(final LongArrayIndexFieldData keyFieldData,
final TimeZoneRounding tzRounding) {
this(keyFieldData, null, tzRounding);
}

@Override
public void collect(final int doc) throws IOException {
// If the datetime field has ordinals available, we can take a bunch of shortcuts later
if(_keyFieldValues instanceof WithOrdinals) {
_docOrds = ((WithOrdinals) _keyFieldValues).ordinals().getOrds(doc);
_docOrdPointer = _docOrds.offset;
Expand All @@ -66,16 +88,35 @@ public void setNextReader(final AtomicReaderContext context) throws IOException
if(hasValueField())
_valueFieldValues = _valueFieldData.load(context).getBytesValues();

// If we have ordinals avilable, we can do most of the work up front.
// We build a mapping from ords to rounded timestamps, so we never
// have to retrieve the field values for a given document. We just
// see which ordinals it has and then get the rounded timestamps they
// correspond to.

// One drawback of this approach is that if we have a very aggressively
// filtered query, there might be many ordinals which are never used by
// any of the documents we will be looking at. So we'd be wasting effort
// by calculating timestamps for all of the ordinals up front.
// TODO come up with a heuristic to avoid falling into this trap.

if(_keyFieldValues instanceof WithOrdinals) {
final int maxOrd = ((WithOrdinals) _keyFieldValues).ordinals().getMaxOrd();
int tsPointer = 0;

// _timestamps holds the rounded timestamps
_timestamps.resetQuick();
_timestamps.add(0);

// _ordToTimestampPointers has one entry for every ord
_ordToTimestampPointers.resetQuick();
_ordToTimestampPointers.add(0);

// We cache these for some small optimizations
long lastDateTime = 0;
long lastTimestamp = 0;
for(int i = 1; i < maxOrd; i++) {
// Get the next ordinal's value so we can calculate its timestamp
final long datetime = ((WithOrdinals) _keyFieldValues).getValueByOrd(i);

// If this datetime is less than a second after the previously-seen timestamp, it will have the same timestamp
Expand All @@ -95,6 +136,8 @@ public void setNextReader(final AtomicReaderContext context) throws IOException
}
}
lastDateTime = datetime;

// Add timestamp pointer for this ord -- could be the same as the previous ord, or a new one
_ordToTimestampPointers.add(tsPointer);
}
} else {
Expand All @@ -105,6 +148,11 @@ public void setNextReader(final AtomicReaderContext context) throws IOException
@Override
public void postCollection() {}

/**
* Are there any more timestamps available?
*
* @return true/false
*/
protected boolean hasNextTimestamp() {
if(_keyFieldValues instanceof WithOrdinals) {
return _docOrdPointer < _docOrds.length;
Expand All @@ -113,12 +161,19 @@ protected boolean hasNextTimestamp() {
}
}

/**
* Get the next timestamp, i.e. the rounded value of the next available datetime.
*
* @return the timestamp
*/
protected long nextTimestamp() {
if(_keyFieldValues instanceof WithOrdinals) {
// We can bypass getting the raw datetime value, and go from ord to timestamp directly (well, directly-ish)
final long ts = _timestamps.get(_ordToTimestampPointers.get(_docOrds.ints[_docOrdPointer]));
_docOrdPointer++;
return ts;
} else {
// Get the next raw datetime, and if necessary, round it
final long datetime = _docIter.next();
// If this datetime is less than a second after the previously-seen timestamp, it will have the same timestamp
// (true because we don't support granularity less than 1 sec)
Expand All @@ -134,14 +189,31 @@ protected long nextTimestamp() {
}
}

/**
* Returns true if this iterator is getting each timestamp once per value of a value field.
* Otherwise, it's getting each timestamp once per document.
*
* @return true/false
*/
protected boolean hasValueField() {
return _valueFieldData != null;
}

/**
* Returns true if there is another value of a value field available, for the current doc.
* If there isn't, or we're not using a value field, returns false.
*
* @return true/false
*/
protected boolean hasNextValue() {
return _valueFieldIter != null && _valueFieldIter.hasNext();
}

/**
* Gets the next value of the value field, or null if we're not using a value field.
*
* @return the next value as a BytesRef, or null
*/
protected BytesRef nextValue() {
return _valueFieldIter == null ? null : _valueFieldIter.next();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,31 +8,65 @@
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentBuilderString;

/**
* An ArrayList that implements ToXContent too. Has a name
* attribute which is used as its fieldname in XContent output. Then
* the list elements are rendered as an XContent list using their own
* toXContent() methods.
*
* @param <E> list element type; must in turn implement ToXContent
*/
public class XContentEnabledList<E extends ToXContent>
extends ArrayList<E> implements ToXContent {

private static final long serialVersionUID = 1L;

private final String _name;

private final XContentBuilderString _xName;

/**
* Create a list by copying in the values of the other collection.
*
* @param data the collection to copy
* @param name the name of the new list
*/
public XContentEnabledList(final Collection<? extends E> data, final String name) {
super(data);
_name = name;
_xName = null;
}

/**
* Create a list with the initial capacity specified.
*
* @param initialCapacity the starting capacity
* @param name the name of the new list
*/
public XContentEnabledList(final int initialCapacity, final String name) {
super(initialCapacity);
_name = name;
_xName = null;
}

/**
* Create an empty list.
*
* @param name the name of the new list
*/
public XContentEnabledList(final String name) {
_name = name;
_xName = null;
}

public XContentEnabledList(final int size, final XContentBuilderString name) {
/**
* Create a list with the initial capacity specified.
*
* @param initialCapacity the starting capacity
* @param name the name of the new list, as XContent
*/
public XContentEnabledList(final int initialCapacity, final XContentBuilderString name) {
super(initialCapacity);
_name = null;
_xName = name;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import org.elasticsearch.common.joda.TimeZoneRounding;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.LongValues.Iter;
import org.elasticsearch.index.fielddata.plain.LongArrayIndexFieldData;
import org.elasticsearch.search.facet.FacetExecutor;
import org.elasticsearch.search.facet.FacetPhaseExecutionException;
Expand All @@ -15,49 +14,45 @@
import com.pearson.entech.elasticsearch.search.facet.approx.date.collectors.SlicedDistinctCollector;
import com.pearson.entech.elasticsearch.search.facet.approx.date.collectors.TimestampFirstCollector;

/**
* Executor for all date facets.
*/
public class DateFacetExecutor extends FacetExecutor {

private static final Iter __emptyIter = new Iter.Empty();
private final TimestampFirstCollector<?> _collector;

private final LongArrayIndexFieldData _keyFieldData;
private final IndexFieldData _valueFieldData;
private final IndexFieldData _distinctFieldData;
private final IndexFieldData _sliceFieldData;
// TODO proper use of generics

private final TimestampFirstCollector _collector;
/**
* Create a new executor.
*
* @param keyFieldData field data for the datetime field used for timestamps
* @param valueFieldData field data for the optional value field, can be null
* @param distinctFieldData field data for the optional distinct field, can be null
* @param sliceFieldData field data for the optional slice field, can be null
* @param tzRounding a timezone rounding object
* @param exactThreshold exact count threshold when doing distincts
*/
public DateFacetExecutor(final LongArrayIndexFieldData keyFieldData, final IndexFieldData<?> valueFieldData,
final IndexFieldData<?> distinctFieldData, final IndexFieldData<?> sliceFieldData,
final TimeZoneRounding tzRounding, final int exactThreshold) {

private final TimeZoneRounding _tzRounding;

private final int _exactThreshold;

public DateFacetExecutor(final LongArrayIndexFieldData keyFieldData, final IndexFieldData valueFieldData,
final IndexFieldData distinctFieldData, final IndexFieldData sliceFieldData,
final TimeZoneRounding tzRounding, final int exactThreshold, final boolean debug) {
_keyFieldData = keyFieldData;
_valueFieldData = valueFieldData;
_distinctFieldData = distinctFieldData;
_sliceFieldData = sliceFieldData;
_tzRounding = tzRounding;
_exactThreshold = exactThreshold;

// TODO type safety for the following constructors

if(_distinctFieldData == null && _sliceFieldData == null)
if(_valueFieldData == null)
if(distinctFieldData == null && sliceFieldData == null)
if(valueFieldData == null)
_collector = new CountingCollector<NullFieldData>(keyFieldData, tzRounding);
else
_collector = new CountingCollector(keyFieldData, _valueFieldData, tzRounding);
else if(_distinctFieldData == null)
if(_valueFieldData == null)
_collector = new CountingCollector(keyFieldData, valueFieldData, tzRounding);
else if(distinctFieldData == null)
if(valueFieldData == null)
_collector = new SlicedCollector(keyFieldData, sliceFieldData, tzRounding);
else
_collector = new SlicedCollector(keyFieldData, valueFieldData, sliceFieldData, tzRounding);
else if(_sliceFieldData == null)
if(_valueFieldData == null)
else if(sliceFieldData == null)
if(valueFieldData == null)
_collector = new DistinctCollector(keyFieldData, distinctFieldData, tzRounding, exactThreshold);
else
throw new FacetPhaseExecutionException("unknown date_facet", "Can't use distinct_field and value_field together");
else if(_valueFieldData == null)
else if(valueFieldData == null)
_collector = new SlicedDistinctCollector(keyFieldData, sliceFieldData, distinctFieldData, tzRounding, exactThreshold);
else
throw new FacetPhaseExecutionException("unknown date_facet", "Can't use distinct_field and value_field together");
Expand Down
Loading

0 comments on commit 2d077df

Please sign in to comment.