Skip to content

Commit

Permalink
dkpro#66 Add inflection group to word form
Browse files Browse the repository at this point in the history
  - Added a test case for multiple declension tables
  - Corrected index generation for the mutliple declention tables case
  • Loading branch information
highsource committed Dec 19, 2018
1 parent 4165136 commit 744c7f7
Show file tree
Hide file tree
Showing 10 changed files with 302 additions and 199 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,21 @@
package de.tudarmstadt.ukp.jwktl.parser.de.components.nountable;

import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import de.tudarmstadt.ukp.jwktl.api.entry.WiktionaryWordForm;
import de.tudarmstadt.ukp.jwktl.parser.de.components.DEGenderText;
import de.tudarmstadt.ukp.jwktl.parser.util.IWiktionaryWordFormTemplateParameterHandler;
import de.tudarmstadt.ukp.jwktl.parser.util.ParsingContext;

public class DEWordFormNounTableHandler implements IWiktionaryWordFormTemplateParameterHandler {

public static final int MAX_INFLECTION_GROUP_COUNT = 4;

public void reset() {
this.genera = new DEGenderText[4];
this.genera = new HashMap<>(DEWordFormNounTableHandler.MAX_INFLECTION_GROUP_COUNT);
}

private List<? extends IWiktionaryWordFormTemplateParameterHandler> handlers = Arrays.asList(
Expand All @@ -51,32 +55,24 @@ public void reset() {
// Accusative
new AccusativeHandler());

protected DEGenderText[] genera = new DEGenderText[4];
protected Map<Integer, DEGenderText> genera = new HashMap<>(DEWordFormNounTableHandler.MAX_INFLECTION_GROUP_COUNT);

/**
* Returns genus by index.
* @param index index of the genus, must be between 1 and 4.
* @param index index of the genus.
* @return Genus by index or <code>null</code> if genus by this index was not set yet.
* @throws IllegalArgumentException If index is not between 1 and 4.
*/
DEGenderText getGenusByIndex(int index) {
if (index < 1 || index > 4) {
throw new IllegalArgumentException("Genus index must be 1, 2, 3 or 4.");
}
return genera[index - 1];
return genera.get(index - 1);
}

/**
* Sets genus by index
* @param genderText genus.
* @param index index of the genus, must be between 1 and 4.
* @throws IllegalArgumentException If index is not between 1 and 4.
* @param index index of the genus.
*/
void setGenusByIndex(DEGenderText genderText, Integer index) {
if (index < 1 || index > 4) {
throw new IllegalArgumentException("Genus index must be 1, 2, 3 or 4.");
}
this.genera[index - 1] = genderText;
void setGenusByIndex(DEGenderText genderText, int index) {
this.genera.put(index - 1, genderText);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@
******************************************************************************/
package de.tudarmstadt.ukp.jwktl.parser.de.components.nountable;

import java.util.List;
import java.util.Objects;
import java.util.regex.Matcher;

import de.tudarmstadt.ukp.jwktl.api.IWiktionaryWordForm;
import de.tudarmstadt.ukp.jwktl.api.entry.WiktionaryEntry;
import de.tudarmstadt.ukp.jwktl.api.entry.WiktionaryWordForm;
import de.tudarmstadt.ukp.jwktl.parser.util.ParsingContext;
import de.tudarmstadt.ukp.jwktl.parser.util.PatternUtils;
Expand All @@ -36,14 +39,28 @@ public PatternBasedIndexedParameterHandler(DEWordFormNounTableHandler nounTableH

public void handle(String label, String value, WiktionaryWordForm wordForm, ParsingContext context) {
final Matcher matcher = pattern.matcher(label);

WiktionaryEntry wiktionaryEntry = context.findEntry();

List<IWiktionaryWordForm> wordForms = wiktionaryEntry.getWordForms();
final int indexOffset;
if (wordForms == null) {
indexOffset = 0;
} else {
final int maxInflectionGroup = wordForms.stream().mapToInt(IWiktionaryWordForm::getInflectionGroup).max()
.orElse(0);
indexOffset = (((maxInflectionGroup - 1) / DEWordFormNounTableHandler.MAX_INFLECTION_GROUP_COUNT) + 1)
* DEWordFormNounTableHandler.MAX_INFLECTION_GROUP_COUNT;
}

if (matcher.find()) {
final Integer index = PatternUtils.extractIndex(matcher);
final int i = index == null ? 1 : index.intValue();
handleIfFound(wordForm, label, i, value, matcher, context);
final int i = index == null ? 1 : index.intValue();
handleIfFound(wordForm, label, i + indexOffset, value, matcher, context);
}
}

public abstract void handleIfFound(WiktionaryWordForm wordForm, String label, int index, String value, Matcher matcher,
ParsingContext context);
public abstract void handleIfFound(WiktionaryWordForm wordForm, String label, int index, String value,
Matcher matcher, ParsingContext context);

}
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,31 @@ public void testGams() throws Exception {
assertWordFormNoun("Gamsen", ACCUSATIVE, PLURAL, null, 1, actualIter.next());
assertFalse(actualIter.hasNext());
}

/***/
public void testApfelschorle() throws Exception {
IWiktionaryPage page = parse("Apfelschorle.txt");
IWiktionaryEntry entry = page.getEntry(0);
Iterator<IWiktionaryWordForm> actualIter = entry.getWordForms().iterator();
assertWordFormNoun("Apfelschorle", NOMINATIVE, SINGULAR, NEUTER, 1, actualIter.next());
assertWordFormNoun("Apfelschorles", NOMINATIVE, PLURAL, null, 1, actualIter.next());
assertWordFormNoun("Apfelschorles", GENITIVE, SINGULAR, NEUTER, 1, actualIter.next());
assertWordFormNoun("Apfelschorles", GENITIVE, PLURAL, null, 1, actualIter.next());
assertWordFormNoun("Apfelschorle", DATIVE, SINGULAR, NEUTER, 1, actualIter.next());
assertWordFormNoun("Apfelschorles", DATIVE, PLURAL, null, 1, actualIter.next());
assertWordFormNoun("Apfelschorle", ACCUSATIVE, SINGULAR, NEUTER, 1, actualIter.next());
assertWordFormNoun("Apfelschorles", ACCUSATIVE, PLURAL, null, 1, actualIter.next());

assertWordFormNoun("Apfelschorle", NOMINATIVE, SINGULAR, FEMININE, 5, actualIter.next());
assertWordFormNoun("Apfelschorlen", NOMINATIVE, PLURAL, null, 5, actualIter.next());
assertWordFormNoun("Apfelschorle", GENITIVE, SINGULAR, FEMININE, 5, actualIter.next());
assertWordFormNoun("Apfelschorlen", GENITIVE, PLURAL, null, 5, actualIter.next());
assertWordFormNoun("Apfelschorle", DATIVE, SINGULAR, FEMININE, 5, actualIter.next());
assertWordFormNoun("Apfelschorlen", DATIVE, PLURAL, null, 5, actualIter.next());
assertWordFormNoun("Apfelschorle", ACCUSATIVE, SINGULAR, FEMININE, 5, actualIter.next());
assertWordFormNoun("Apfelschorlen", ACCUSATIVE, PLURAL, null, 5, actualIter.next());
assertFalse(actualIter.hasNext());
}

protected void assertWordFormNoun(final String expectedForm, final GrammaticalCase expectedCase,
final GrammaticalNumber expectedNumber, GrammaticalGender expectedGender,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,35 +37,4 @@ public void testGetsSetGenus() {
public void testGetsNotSetGenus() {
assertNull(nounTableHandler.getGenusByIndex(3));
}

public void testThrowsExceptionSettingGenusWithInvalidIndex() {
try {
nounTableHandler.setGenusByIndex(DEGenderText.F, 0);
fail();
} catch (IllegalArgumentException expected) {
assertTrue(true);
}
try {
nounTableHandler.setGenusByIndex(DEGenderText.F, 5);
fail();
} catch (IllegalArgumentException expected) {
assertTrue(true);
}
}

public void testThrowsExceptionGettingGenusWithInvalidIndex() {
try {
nounTableHandler.getGenusByIndex(0);
fail();
} catch (IllegalArgumentException expected) {
assertTrue(true);
}
try {
nounTableHandler.getGenusByIndex(5);
fail();
} catch (IllegalArgumentException expected) {
assertTrue(true);
}
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,88 +17,93 @@
******************************************************************************/
package de.tudarmstadt.ukp.jwktl.parser.de.components.nountable;

import de.tudarmstadt.ukp.jwktl.api.entry.WiktionaryPage;
import de.tudarmstadt.ukp.jwktl.api.entry.WiktionaryWordForm;
import de.tudarmstadt.ukp.jwktl.api.util.GrammaticalGender;
import de.tudarmstadt.ukp.jwktl.api.util.GrammaticalNumber;
import de.tudarmstadt.ukp.jwktl.parser.de.components.DEEntryFactory;
import de.tudarmstadt.ukp.jwktl.parser.util.ParsingContext;
import junit.framework.TestCase;

public class EinzahlHandlerTest extends TestCase {

private DEWordFormNounTableHandler nounTableHandler;
private GenusHandler genusHandler;
private EinzahlHandler einzahlHandler;
private ParsingContext parsingContext;

@Override
protected void setUp() throws Exception {
nounTableHandler = new DEWordFormNounTableHandler();
genusHandler = new GenusHandler(nounTableHandler);
einzahlHandler = new EinzahlHandler(nounTableHandler);
parsingContext = new ParsingContext(new WiktionaryPage(), new DEEntryFactory());
}

public void testCanHandle() {
assertFalse(einzahlHandler.canHandle(null, null, null, null));
assertFalse(einzahlHandler.canHandle("Wer oder was?", null, null, null));
assertTrue(einzahlHandler.canHandle("Wer oder was? (Einzahl)", null, null, null));
assertTrue(einzahlHandler.canHandle("Wer oder was? (Einzahl 1)", null, null, null));
assertTrue(einzahlHandler.canHandle("Wer oder was? (Einzahl 2)", null, null, null));
assertTrue(einzahlHandler.canHandle("Wer oder was? (Einzahl 3)", null, null, null));
assertTrue(einzahlHandler.canHandle("Wer oder was? (Einzahl 4)", null, null, null));
assertFalse(einzahlHandler.canHandle("Wer oder was? (Einzahl 5)", null, null, null));
assertFalse(einzahlHandler.canHandle(null, null, null, parsingContext));
assertFalse(einzahlHandler.canHandle("Wer oder was?", null, null, parsingContext));
assertTrue(einzahlHandler.canHandle("Wer oder was? (Einzahl)", null, null, parsingContext));
assertTrue(einzahlHandler.canHandle("Wer oder was? (Einzahl 1)", null, null, parsingContext));
assertTrue(einzahlHandler.canHandle("Wer oder was? (Einzahl 2)", null, null, parsingContext));
assertTrue(einzahlHandler.canHandle("Wer oder was? (Einzahl 3)", null, null, parsingContext));
assertTrue(einzahlHandler.canHandle("Wer oder was? (Einzahl 4)", null, null, parsingContext));
assertFalse(einzahlHandler.canHandle("Wer oder was? (Einzahl 5)", null, null, parsingContext));
}

public void testEinzahlWithGenus() {
WiktionaryWordForm wordForm = new WiktionaryWordForm("test");
genusHandler.handle("Genus", "m", wordForm, null);
einzahlHandler.handle("Wer oder was? (Einzahl)", "test", wordForm, null);
genusHandler.handle("Genus", "m", wordForm, parsingContext);
einzahlHandler.handle("Wer oder was? (Einzahl)", "test", wordForm, parsingContext);
assertEquals(GrammaticalNumber.SINGULAR, wordForm.getNumber());
assertEquals(GrammaticalGender.MASCULINE, wordForm.getGender());
}

public void testEinzahlWithGenus_1() {
WiktionaryWordForm wordForm = new WiktionaryWordForm("test");
genusHandler.handle("Genus 1", "n", wordForm, null);
einzahlHandler.handle("Wer oder was? (Einzahl)", "test", wordForm, null);
genusHandler.handle("Genus 1", "n", wordForm, parsingContext);
einzahlHandler.handle("Wer oder was? (Einzahl)", "test", wordForm, parsingContext);
assertEquals(GrammaticalNumber.SINGULAR, wordForm.getNumber());
assertEquals(GrammaticalGender.NEUTER, wordForm.getGender());
}

public void testEinzahl_1WithGenus_1() {
WiktionaryWordForm wordForm = new WiktionaryWordForm("test");
genusHandler.handle("Genus 1", "f", wordForm, null);
einzahlHandler.handle("Wer oder was? (Einzahl 1)", "test", wordForm, null);
genusHandler.handle("Genus 1", "f", wordForm, parsingContext);
einzahlHandler.handle("Wer oder was? (Einzahl 1)", "test", wordForm, parsingContext);
assertEquals(GrammaticalNumber.SINGULAR, wordForm.getNumber());
assertEquals(GrammaticalGender.FEMININE, wordForm.getGender());
}

public void testEinzahl_2WithGenus_2() {
WiktionaryWordForm wordForm = new WiktionaryWordForm("test");
genusHandler.handle("Genus 2", "x", wordForm, null);
einzahlHandler.handle("Wer oder was? (Einzahl 2)", "test", wordForm, null);
genusHandler.handle("Genus 2", "x", wordForm, parsingContext);
einzahlHandler.handle("Wer oder was? (Einzahl 2)", "test", wordForm, parsingContext);
assertEquals(GrammaticalNumber.SINGULAR, wordForm.getNumber());
assertNull(wordForm.getGender());
}

public void testEinzahl_3WithGenus_3() {
WiktionaryWordForm wordForm = new WiktionaryWordForm("test");
genusHandler.handle("Genus 3", "m", wordForm, null);
einzahlHandler.handle("Wer oder was? (Einzahl 3)", "test", wordForm, null);
genusHandler.handle("Genus 3", "m", wordForm, parsingContext);
einzahlHandler.handle("Wer oder was? (Einzahl 3)", "test", wordForm, parsingContext);
assertEquals(GrammaticalNumber.SINGULAR, wordForm.getNumber());
assertEquals(GrammaticalGender.MASCULINE, wordForm.getGender());
}

public void testEinzahl_3WithGenus_2() {
WiktionaryWordForm wordForm = new WiktionaryWordForm("test");
genusHandler.handle("Genus 2", "m", wordForm, null);
einzahlHandler.handle("Wer oder was? (Einzahl 3)", "test", wordForm, null);
genusHandler.handle("Genus 2", "m", wordForm, parsingContext);
einzahlHandler.handle("Wer oder was? (Einzahl 3)", "test", wordForm, parsingContext);
assertEquals(GrammaticalNumber.SINGULAR, wordForm.getNumber());
assertNull(wordForm.getGender());
}


public void testEinzahl_4WithGenus_4() {
WiktionaryWordForm wordForm = new WiktionaryWordForm("test");
genusHandler.handle("Genus 4", "n", wordForm, null);
einzahlHandler.handle("Wer oder was? (Einzahl 4)", "test", wordForm, null);
genusHandler.handle("Genus 4", "n", wordForm, parsingContext);
einzahlHandler.handle("Wer oder was? (Einzahl 4)", "test", wordForm, parsingContext);
assertEquals(GrammaticalNumber.SINGULAR, wordForm.getNumber());
assertEquals(GrammaticalGender.NEUTER, wordForm.getGender());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,61 +17,66 @@
******************************************************************************/
package de.tudarmstadt.ukp.jwktl.parser.de.components.nountable;

import de.tudarmstadt.ukp.jwktl.api.entry.WiktionaryPage;
import de.tudarmstadt.ukp.jwktl.parser.de.components.DEEntryFactory;
import de.tudarmstadt.ukp.jwktl.parser.de.components.DEGenderText;
import de.tudarmstadt.ukp.jwktl.parser.util.ParsingContext;
import junit.framework.TestCase;

public class GenusHandlerTest extends TestCase {

private DEWordFormNounTableHandler nounTableHandler;
private GenusHandler genusHandler;
private ParsingContext parsingContext;

@Override
protected void setUp() throws Exception {
nounTableHandler = new DEWordFormNounTableHandler();
genusHandler = new GenusHandler(nounTableHandler);
parsingContext = new ParsingContext(new WiktionaryPage(), new DEEntryFactory());
}

public void testCanHandle() {
assertFalse(genusHandler.canHandle(null, null, null, null));
assertFalse(genusHandler.canHandle("Suneg", null, null, null));
assertTrue(genusHandler.canHandle("Genus", null, null, null));
assertFalse(genusHandler.canHandle("Genus ", null, null, null));
assertFalse(genusHandler.canHandle("Genus 0", null, null, null));
assertTrue(genusHandler.canHandle("Genus 1", null, null, null));
assertTrue(genusHandler.canHandle("Genus 2", null, null, null));
assertTrue(genusHandler.canHandle("Genus 3", null, null, null));
assertTrue(genusHandler.canHandle("Genus 4", null, null, null));
assertFalse(genusHandler.canHandle("Genus 4.5", null, null, null));
assertFalse(genusHandler.canHandle("Genus 5", null, null, null));
assertFalse(genusHandler.canHandle(null, null, null, parsingContext));
assertFalse(genusHandler.canHandle("Suneg", null, null, parsingContext));
assertTrue(genusHandler.canHandle("Genus", null, null, parsingContext));
assertFalse(genusHandler.canHandle("Genus ", null, null, parsingContext));
assertFalse(genusHandler.canHandle("Genus 0", null, null, parsingContext));
assertTrue(genusHandler.canHandle("Genus 1", null, null, parsingContext));
assertTrue(genusHandler.canHandle("Genus 2", null, null, parsingContext));
assertTrue(genusHandler.canHandle("Genus 3", null, null, parsingContext));
assertTrue(genusHandler.canHandle("Genus 4", null, null, parsingContext));
assertFalse(genusHandler.canHandle("Genus 4.5", null, null, parsingContext));
assertFalse(genusHandler.canHandle("Genus 5", null, null, parsingContext));
}

public void testGenus() {
genusHandler.handle("Genus", "m", null, null);
genusHandler.handle("Genus", "m", null, parsingContext);
assertEquals(DEGenderText.M, nounTableHandler.getGenusByIndex(1));
assertNull(nounTableHandler.getGenusByIndex(2));
}

public void testGenus1() {
genusHandler.handle("Genus 1", "n", null, null);
genusHandler.handle("Genus 1", "n", null, parsingContext);
assertEquals(DEGenderText.N, nounTableHandler.getGenusByIndex(1));
assertNull(nounTableHandler.getGenusByIndex(2));
}

public void testGenus2() {
genusHandler.handle("Genus 2", "pl", null, null);
genusHandler.handle("Genus 2", "pl", null, parsingContext);
assertNull(nounTableHandler.getGenusByIndex(1));
assertEquals(DEGenderText.PL, nounTableHandler.getGenusByIndex(2));
assertNull(nounTableHandler.getGenusByIndex(3));
}

public void testGenus3() {
genusHandler.handle("Genus 3", "0", null, null);
genusHandler.handle("Genus 3", "0", null, parsingContext);
assertNull(nounTableHandler.getGenusByIndex(2));
assertEquals(DEGenderText._0, nounTableHandler.getGenusByIndex(3));
assertNull(nounTableHandler.getGenusByIndex(4));
}
public void testGenus4() {
genusHandler.handle("Genus 4", "x", null, null);
genusHandler.handle("Genus 4", "x", null, parsingContext);
assertNull(nounTableHandler.getGenusByIndex(3));
assertEquals(DEGenderText.X, nounTableHandler.getGenusByIndex(4));
}
Expand Down
Loading

0 comments on commit 744c7f7

Please sign in to comment.