Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support divorces, add marriage information #15

Merged
merged 4 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 29 additions & 29 deletions src/main/java/com/neo4j/data/importer/GedcomImporter.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
package com.neo4j.data.importer;

import com.neo4j.data.importer.Lists.Pair;
import com.joestelmach.natty.Parser;
import com.neo4j.data.importer.extractors.FamilyExtractors;
import com.neo4j.data.importer.extractors.PersonExtractors;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import org.folg.gedcom.model.Gedcom;
import org.folg.gedcom.model.SpouseRef;
import org.folg.gedcom.parser.ModelParser;
import org.neo4j.common.DependencyResolver;
import org.neo4j.configuration.Config;
Expand Down Expand Up @@ -38,7 +37,8 @@ public Stream<Statistics> loadGedcom(@Name("file") String file) throws IOExcepti
var filePath = rebuildPath(file);
var model = loadModel(filePath);

var personExtractors = new PersonExtractors(model);
var dateParser = new Parser();
var personExtractors = new PersonExtractors(dateParser, model);
var statistics = new Statistics();
try (Transaction tx = db.beginTx()) {
model.getPeople().forEach(person -> {
Expand All @@ -49,33 +49,33 @@ public Stream<Statistics> loadGedcom(@Name("file") String file) throws IOExcepti
statistics.addNodesCreated(personsStats.getNodesCreated());
});

var familyExtractors = new FamilyExtractors(dateParser);
model.getFamilies().forEach(family -> {
List<String> spouseReferences1 =
family.getHusbandRefs().stream().map(SpouseRef::getRef).toList();
List<String> spouseReferences2 =
family.getWifeRefs().stream().map(SpouseRef::getRef).toList();
List<Pair<String, String>> couples = Lists.crossProduct(spouseReferences1, spouseReferences2);
List<String> childrenReferences =
family.getChildRefs().stream().map(SpouseRef::getRef).toList();
couples.forEach(couple -> {
var stats = tx.execute(
"""
MATCH (spouse1:Person {id: $spouseId1}), (spouse2:Person {id: $spouseId2})
CREATE (spouse1)-[:IS_MARRIED_TO]->(spouse2)
WITH spouse1, spouse2
UNWIND $childIds AS childId
MATCH (child:Person {id: childId})
CREATE (child)-[:IS_CHILD_OF]->(spouse1)
CREATE (child)-[:IS_CHILD_OF]->(spouse2)
""",
Map.of(
"spouseId1", couple.left(),
"spouseId2", couple.right(),
"childIds", childrenReferences))
.getQueryStatistics();
var attributes = familyExtractors.get().apply(family);
var stats = tx.execute(
"""
UNWIND $spouseIdPairs AS spouseInfo
MATCH (spouse1:Person {id: spouseInfo.id1}),
(spouse2:Person {id: spouseInfo.id2})
CREATE (spouse1)-[r:IS_SPOUSE_OF]->(spouse2)
FOREACH (marriageInfo IN spouseInfo.events["MARR"] |
CREATE (spouse1)-[r:IS_MARRIED_TO]->(spouse2)
SET r = marriageInfo
)
FOREACH (divorceInfo IN spouseInfo.events["DIV"] |
CREATE (spouse1)-[r:DIVORCED]->(spouse2)
SET r = divorceInfo
)
WITH spouse1, spouse2
UNWIND $childIds AS childId
MATCH (child:Person {id: childId})
CREATE (child)-[:IS_CHILD_OF]->(spouse1)
CREATE (child)-[:IS_CHILD_OF]->(spouse2)
""",
attributes)
.getQueryStatistics();

statistics.addRelationshipsCreated(stats.getRelationshipsCreated());
});
statistics.addRelationshipsCreated(stats.getRelationshipsCreated());
});

tx.commit();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package com.neo4j.data.importer.extractors;

import com.joestelmach.natty.Parser;
import com.neo4j.data.importer.extractors.Lists.Pair;
import java.util.List;
import java.util.Map;
import org.folg.gedcom.model.Family;
import org.folg.gedcom.model.SpouseRef;

class DefaultFamilyExtractor implements FamilyExtractor {

private final Parser dateParser;

DefaultFamilyExtractor(Parser dateParser) {
this.dateParser = dateParser;
}

@Override
public List<Pair<String, String>> spouseReferences(Family family) {
List<String> spouseReferences1 =
family.getHusbandRefs().stream().map(SpouseRef::getRef).toList();
List<String> spouseReferences2 =
family.getWifeRefs().stream().map(SpouseRef::getRef).toList();
return Lists.crossProduct(spouseReferences1, spouseReferences2);
}

@Override
public Map<String, List<Map<String, Object>>> familyEvents(Family family) {
return EventFacts.extract(family.getEventsFacts(), dateParser);
}

@Override
public List<String> childReferences(Family family) {
return family.getChildRefs().stream().map(SpouseRef::getRef).toList();
}
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
package com.neo4j.data.importer.extractors;

import com.joestelmach.natty.Parser;
import java.time.LocalDate;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
Expand All @@ -19,8 +16,8 @@ class DefaultPersonExtractor implements PersonExtractor {

private final Parser dateParser;

public DefaultPersonExtractor() {
this.dateParser = new Parser();
public DefaultPersonExtractor(Parser dateParser) {
this.dateParser = dateParser;
}

@Override
Expand Down Expand Up @@ -50,42 +47,7 @@ public Optional<String> gender(Person person) {

@Override
public Map<String, Object> facts(Person person) {
Map<String, Object> attributes = new HashMap<>();
person.getEventsFacts().forEach(eventFact -> {
String factName = eventFact.getDisplayType().toLowerCase(Locale.ROOT);
String date = eventFact.getDate();
if (date != null) {
attributes.put(String.format("raw_%s_date", factName), date);
var localDate = parseLocalDate(date);
if (localDate != null) {
attributes.put(String.format("%s_date", factName), localDate);
}
}

String place = eventFact.getPlace();
if (place != null) {
attributes.put(factName + "_" + "location", place);
}
});
return attributes;
}

private LocalDate parseLocalDate(String date) {
var parse = dateParser.parse(date);
if (parse.size() != 1) {
return null;
}

var dateGroup = parse.get(0);
if (dateGroup.getDates().size() != 1 || dateGroup.isDateInferred()) {
// Dates should be parsed explicitly from input.
// Inferred dates are likely to be set using current time and therefore incorrect.
return null;
}

var parsedDate = dateGroup.getDates().get(0);

return LocalDate.ofInstant(parsedDate.toInstant(), ZoneId.systemDefault());
return EventFacts.extractFlat(person.getEventsFacts(), dateParser);
}

private static List<String> extractNames(Person person, Function<Name, String> nameFn) {
Expand Down
88 changes: 88 additions & 0 deletions src/main/java/com/neo4j/data/importer/extractors/EventFacts.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package com.neo4j.data.importer.extractors;

import com.joestelmach.natty.Parser;
import java.time.LocalDate;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.function.Function;
import org.folg.gedcom.model.EventFact;

class EventFacts {

/**
* extractFlat extracts all events' place and location into a single, "flat" map
*/
public static Map<String, Object> extractFlat(List<EventFact> facts, Parser dateParser) {
var attributes = new HashMap<String, Object>();
facts.forEach(fact -> {
attributes.putAll(extractFact(
fact,
dateParser,
(eventFact) ->
String.format("%s_", eventFact.getDisplayType().toLowerCase(Locale.ROOT))));
});
return attributes;
}

/**
* extract all events' place and location, categorized by event tag
*/
public static Map<String, List<Map<String, Object>>> extract(List<EventFact> facts, Parser dateParser) {
var attributes = new HashMap<String, List<Map<String, Object>>>();
for (EventFact fact : facts) {
var eventsPerTag =
attributes.computeIfAbsent(fact.getTag().toUpperCase(Locale.ROOT), (key) -> new ArrayList<>());
eventsPerTag.add(extractFact(fact, dateParser));
}
return attributes;
}

private static Map<String, Object> extractFact(EventFact eventFact, Parser dateParser) {
return extractFact(eventFact, dateParser, (fact) -> "");
}

private static Map<String, Object> extractFact(
EventFact fact, Parser dateParser, Function<EventFact, String> keyQualifierFn) {
var attributes = new HashMap<String, Object>(2);
String date = fact.getDate();
String keyQualifier = keyQualifierFn.apply(fact);
String type = fact.getType();
if (type != null) {
attributes.put(String.format("%stype", keyQualifier), type);
}
if (date != null) {
attributes.put(String.format("raw_%sdate", keyQualifier), date);
var localDate = parseLocalDate(dateParser, date);
if (localDate != null) {
attributes.put(String.format("%sdate", keyQualifier), localDate);
}
}
String place = fact.getPlace();
if (place != null) {
attributes.put(String.format("%slocation", keyQualifier), place);
}
return attributes;
}

private static LocalDate parseLocalDate(Parser dateParser, String date) {
var parse = dateParser.parse(date);
if (parse.size() != 1) {
return null;
}

var dateGroup = parse.get(0);
if (dateGroup.getDates().size() != 1 || dateGroup.isDateInferred()) {
// Dates should be parsed explicitly from input.
// Inferred dates are likely to be set using current time and therefore incorrect.
return null;
}

var parsedDate = dateGroup.getDates().get(0);

return LocalDate.ofInstant(parsedDate.toInstant(), ZoneId.systemDefault());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package com.neo4j.data.importer.extractors;

import com.neo4j.data.importer.extractors.Lists.Pair;
import java.util.List;
import java.util.Map;
import org.folg.gedcom.model.Family;

public interface FamilyExtractor extends AttributeExtractor<Family> {

List<Pair<String, String>> spouseReferences(Family family);

Map<String, List<Map<String, Object>>> familyEvents(Family family);

List<String> childReferences(Family family);

default Map<String, Object> apply(Family family) {
var familyEvents = familyEvents(family);
var spouseInfo = spouseReferences(family).stream()
.map(couple -> Map.of(
"id1", couple.left(),
"id2", couple.right(),
"events", familyEvents))
.toList();
return Map.of("spouseIdPairs", spouseInfo, "childIds", childReferences(family));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.neo4j.data.importer.extractors;

import com.joestelmach.natty.Parser;
import java.util.function.Supplier;
import org.folg.gedcom.model.Family;

public class FamilyExtractors implements Supplier<AttributeExtractor<Family>> {

private final Parser dateParser;

public FamilyExtractors(Parser dateParser) {
this.dateParser = dateParser;
}

@Override
public AttributeExtractor<Family> get() {
return new DefaultFamilyExtractor(dateParser);
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.neo4j.data.importer;
package com.neo4j.data.importer.extractors;

import java.util.List;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
package com.neo4j.data.importer.extractors;

import com.joestelmach.natty.Parser;
import java.util.Locale;
import java.util.function.Supplier;
import org.folg.gedcom.model.Gedcom;
import org.folg.gedcom.model.Person;

public class PersonExtractors implements Supplier<AttributeExtractor<Person>> {

private final Parser dateParser;
private final String generatorName;

public PersonExtractors(Gedcom model) {
public PersonExtractors(Parser dateParser, Gedcom model) {
this.dateParser = dateParser;
this.generatorName = model.getHeader().getGenerator().getName().toLowerCase(Locale.ROOT);
}

@Override
public AttributeExtractor<Person> get() {
var defaultExtractor = new DefaultPersonExtractor();
var defaultExtractor = new DefaultPersonExtractor(dateParser);
if ("heredis pc".equals(generatorName)) {
return new HeredisPersonExtractor(defaultExtractor);
}
Expand Down
Loading