Skip to content

Commit

Permalink
No issue: Fix file leaks in tests
Browse files Browse the repository at this point in the history
  • Loading branch information
reckart committed Aug 3, 2024
1 parent 55942bb commit ed181f1
Showing 1 changed file with 37 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,84 +30,99 @@
* Use {@link #load(File)} to initialize.
*/
public class TextFormatVectorizer
implements Vectorizer
implements Vectorizer
{
private Map<String, float[]> embeddings;
private float[] unknownVector;
private int dimensions;
private boolean caseless;

private TextFormatVectorizer(Map<String, float[]> embeddings)
private TextFormatVectorizer(Map<String, float[]> aEmbeddings)
{
assert !embeddings.isEmpty();
this.embeddings = embeddings;
dimensions = embeddings.values().iterator().next().length;
assert !aEmbeddings.isEmpty();
embeddings = aEmbeddings;
dimensions = aEmbeddings.values().iterator().next().length;
unknownVector = VectorizerUtils.randomVector(dimensions);
caseless = embeddings.keySet().stream()
caseless = aEmbeddings.keySet().stream()
.allMatch(token -> token.equals(token.toLowerCase()));
}

/**
* Load a text-format embeddings file (assuming no header line).
*
* @param f the {@link File} containing the embeddings in text format
* @param f
* the {@link File} containing the embeddings in text format
* @return a new {@link TextFormatVectorizer}
* @throws IOException if an I/O error occurs
* @throws IOException
* if an I/O error occurs
*/
public static Vectorizer load(File f)
throws IOException
public static Vectorizer load(File f) throws IOException
{
return load(f, false);
}

/**
* Load a text-format embeddings file.
*
* @param embeddingsFile the {@link File} containing the embeddings in text format
* @param hasHeaderLine if true, the first line in the file is expected to be a header line
* @param embeddingsFile
* the {@link File} containing the embeddings in text format
* @param hasHeaderLine
* if true, the first line in the file is expected to be a header line
* @return a new {@link TextFormatVectorizer}
* @throws IOException if an I/O error occurs
* @throws IOException
* if an I/O error occurs
*/
@SuppressWarnings("WeakerAccess")
public static Vectorizer load(File embeddingsFile, boolean hasHeaderLine)
throws IOException
public static Vectorizer load(File embeddingsFile, boolean hasHeaderLine) throws IOException
{
return new TextFormatVectorizer(
TextFormatVectorizerUtils.readEmbeddingFileTxt(embeddingsFile, hasHeaderLine));
}

@Override public float[] vectorize(String token)
@Override
public float[] vectorize(String token)
{
if (caseless) {
token = token.toLowerCase();
}

float[] vector = contains(token) ? embeddings.get(token) : unknownVector();
assert vector.length == dimensions();
return vector;
}

@Override public boolean contains(String token)
@Override
public boolean contains(String token)
{
return embeddings.containsKey(token);
}

@Override public float[] unknownVector()
@Override
public float[] unknownVector()
{
return unknownVector;
}

@Override public int dimensions()
@Override
public int dimensions()
{
return dimensions;
}

@Override public int size()
@Override
public int size()
{
return embeddings.size();
}

@Override public boolean isCaseless()
@Override
public boolean isCaseless()
{
return caseless;
}

@Override
public void close() throws Exception
{
// Nothing to do
}
}

0 comments on commit ed181f1

Please sign in to comment.