From 1b9df0e9aa8934387c3068d5556e3a0fceefd247 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Wed, 3 Jan 2024 18:46:51 +0800 Subject: [PATCH] selected-link custom modification this can be used as an experimental worker version with a custom modification of r5type select-link adresses #913 --- .../java/com/conveyal/gtfs/GTFSCache.java | 8 +- src/main/java/com/conveyal/gtfs/GTFSFeed.java | 14 +- .../java/com/conveyal/gtfs/GeometryCache.java | 2 +- .../java/com/conveyal/r5/analyst/Grid.java | 4 +- .../r5/analyst/cluster/PathResult.java | 50 ++++ .../r5/analyst/cluster/SelectedLink.java | 278 ++++++++++++++++++ .../cluster/TransportNetworkConfig.java | 2 +- .../scenario/ModificationTypeResolver.java | 1 + .../r5/analyst/scenario/SelectLink.java | 179 +++++++++++ .../r5/analyst/scenario/package-info.java | 5 + .../com/conveyal/r5/common/GeometryUtils.java | 33 +++ .../com/conveyal/r5/transit/TransitLayer.java | 204 ++++++++----- .../conveyal/r5/transit/TransportNetwork.java | 10 + .../r5/transit/TransportNetworkCache.java | 35 ++- .../com/conveyal/r5/transit/TripPattern.java | 2 + .../r5/transit/path/PatternSequence.java | 8 +- .../r5/transit/path/StopSequence.java | 1 + 17 files changed, 749 insertions(+), 87 deletions(-) create mode 100644 src/main/java/com/conveyal/r5/analyst/cluster/SelectedLink.java create mode 100644 src/main/java/com/conveyal/r5/analyst/scenario/SelectLink.java diff --git a/src/main/java/com/conveyal/gtfs/GTFSCache.java b/src/main/java/com/conveyal/gtfs/GTFSCache.java index 6a5b27cbd..35ed22cd4 100644 --- a/src/main/java/com/conveyal/gtfs/GTFSCache.java +++ b/src/main/java/com/conveyal/gtfs/GTFSCache.java @@ -48,10 +48,10 @@ public class GTFSCache implements Component { // The following two caches hold spatial indexes of GTFS geometries for generating Mapbox vector tiles, one spatial // index per feed keyed on BundleScopedFeedId. They could potentially be combined such that cache values are a // compound type holding two indexes, or cache values are a single index containing a mix of different geometry - // types that are filtered on iteration. They could also be integreated into the GTFSFeed values of the main - // GTFSCache#cache. However GTFSFeed is already a very long class, and we may want to tune eviction parameters + // types that are filtered on iteration. They could also be integrated into the GTFSFeed values of the main + // GTFSCache#cache. However, GTFSFeed is already a very long class, and we may want to tune eviction parameters // separately for GTFSFeed and these indexes. While GTFSFeeds are expected to incur constant memory use, the - // spatial indexes are potentially unlimited in size and we may want to evict them faster or limit their quantity. + // spatial indexes are potentially unlimited in size, so we may want to evict them faster or limit their quantity. // We have decided to keep them as separate caches until we're certain of the chosen eviction tuning parameters. /** A cache of spatial indexes of TripPattern shapes, keyed on the BundleScopedFeedId. */ @@ -127,6 +127,8 @@ public FileStorageKey getFileKey (String id, String extension) { // The feedId of the GTFSFeed objects may not be unique - we can have multiple versions of the same feed // covering different time periods, uploaded by different users. Therefore we record another ID here that is // known to be unique across the whole application - the ID used to fetch the feed. + // NOTE as of 2023, this is no longer true. All uploaded feeds have assigned unique UUIDs so as far as I know + // they can't collide, we don't need this uniqueId field, and we may not even need bundle-scoped feed IDs. feed.uniqueId = id; return feed; } diff --git a/src/main/java/com/conveyal/gtfs/GTFSFeed.java b/src/main/java/com/conveyal/gtfs/GTFSFeed.java index 83236ad01..e001b42ce 100644 --- a/src/main/java/com/conveyal/gtfs/GTFSFeed.java +++ b/src/main/java/com/conveyal/gtfs/GTFSFeed.java @@ -85,16 +85,18 @@ public class GTFSFeed implements Cloneable, Closeable { /** The MapDB database handling persistence of Maps to a pair of disk files behind the scenes. */ private DB db; - /** An ID (sometimes declared by the feed itself) which may remain the same across successive feed versions. */ + /** + * An ID (sometimes declared by the feed itself) which may remain the same across successive feed versions. + * In R5 as of 2023 this is always overwritten with a unique UUID to avoid problems with successive feed versions + * or edited/modified versions of the same feeds. + */ public String feedId; /** - * This field was merged in from the wrapper FeedSource. It is a unique identifier for this particular GTFS file. - * Successive versions of the data for the same operators, or even different copies of the same operator's data - * uploaded by different people, should have different uniqueIds. - * In practice this is mostly copied into WrappedGTFSEntity instances used in the Analysis GraphQL API. + * In R5 as of 2023, this field will contain the bundle-scoped feed ID used to fetch the feed object from the + * GTFSCache (but is not present on disk or before saving - only after it's been reloaded from a file by the cache). */ - public transient String uniqueId; // set this to feedId until it is overwritten, to match FeedSource behavior + public transient String uniqueId; // All tables below should be MapDB maps so the entire GTFSFeed is persistent and uses constant memory. diff --git a/src/main/java/com/conveyal/gtfs/GeometryCache.java b/src/main/java/com/conveyal/gtfs/GeometryCache.java index 96fa31635..642b8a46e 100644 --- a/src/main/java/com/conveyal/gtfs/GeometryCache.java +++ b/src/main/java/com/conveyal/gtfs/GeometryCache.java @@ -19,7 +19,7 @@ * LoadingCache so should be thread safe and provide granular per-key locking, which is convenient when serving up * lots of simultaneous vector tile requests. * - * This is currently used only for looking up geomertries when producing Mapbox vector map tiles, hence the single + * This is currently used only for looking up geometries when producing Mapbox vector map tiles, hence the single * set of hard-wired cache eviction parameters. For more general use we'd want another constructor to change them. */ public class GeometryCache { diff --git a/src/main/java/com/conveyal/r5/analyst/Grid.java b/src/main/java/com/conveyal/r5/analyst/Grid.java index ad9faf1ed..957d4235d 100644 --- a/src/main/java/com/conveyal/r5/analyst/Grid.java +++ b/src/main/java/com/conveyal/r5/analyst/Grid.java @@ -53,6 +53,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -170,7 +171,8 @@ public List getPixelWeights (Geometry geometry, boolean relativeToP double area = geometry.getArea(); if (area < 1e-12) { - throw new IllegalArgumentException("Feature geometry is too small"); + LOG.warn("Discarding feature. Its area is too small to serve as a denominator ({} square degrees).", area); + return Collections.EMPTY_LIST; } if (area > MAX_FEATURE_AREA_SQ_DEG) { diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java index c2366ac1a..0446cab65 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java @@ -2,6 +2,7 @@ import com.conveyal.r5.analyst.StreetTimesAndModes; import com.conveyal.r5.transit.TransitLayer; +import com.conveyal.r5.transit.TripPattern; import com.conveyal.r5.transit.path.Path; import com.conveyal.r5.transit.path.PatternSequence; import com.conveyal.r5.transit.path.RouteSequence; @@ -9,9 +10,16 @@ import com.google.common.collect.Multimap; import gnu.trove.list.TIntList; import gnu.trove.list.array.TIntArrayList; +import gnu.trove.set.TIntSet; +import gnu.trove.set.hash.TIntHashSet; import org.apache.commons.lang3.ArrayUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.awt.*; +import java.lang.invoke.MethodHandles; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import java.util.List; @@ -32,6 +40,8 @@ public class PathResult { + private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + /** * The maximum number of destinations for which we'll generate detailed path information in a single request. * Detailed path information was added on to the original design, which returned a simple grid of travel times. @@ -41,12 +51,14 @@ public class PathResult { public static final int MAX_PATH_DESTINATIONS = 5_000; private final int nDestinations; + /** * Array with one entry per destination. Each entry is a map from a "path template" to the associated iteration * details. For now, the path template is a route-based path ignoring per-iteration details such as wait time. * With additional changes, patterns could be collapsed further to route combinations or modes. */ public final Multimap[] iterationsForPathTemplates; + private final TransitLayer transitLayer; public static final String[] DATA_COLUMNS = new String[]{ @@ -83,6 +95,15 @@ public PathResult(AnalysisWorkerTask task, TransitLayer transitLayer) { * pattern-based keys */ public void setTarget(int targetIndex, Multimap patterns) { + + // When selected link analysis is enabled, filter down the PatternSequence-Iteration Multimap to retain only + // those keys passing through the selected links. + // TODO Maybe selectedLink should be on TransitLayer, and somehow indicate the number of removed iterations. + if (transitLayer.parentNetwork.selectedLink != null) { + patterns = transitLayer.parentNetwork.selectedLink.filterPatterns(patterns); + } + + // The rest of this runs independent of whether a SelectedLink filtered down the patterns-iterations map. Multimap routes = HashMultimap.create(); patterns.forEach(((patternSeq, iteration) -> routes.put(new RouteSequence(patternSeq, transitLayer), iteration))); iterationsForPathTemplates[targetIndex] = routes; @@ -103,6 +124,35 @@ public ArrayList[] summarizeIterations(Stat stat) { summary[d] = new ArrayList<>(); Multimap iterationMap = iterationsForPathTemplates[d]; if (iterationMap != null) { + // SelectedLink case: collapse all RouteSequences and Iterations for this OD pair into one to simplify. + // This could also be done by merging all Iterations under a single RouteSequence with all route IDs. + if (transitLayer.parentNetwork.selectedLink != null) { + int nIterations = 0; + TIntSet allRouteIds = new TIntHashSet(); + double summedTotalTime = 0; + for (RouteSequence routeSequence: iterationMap.keySet()) { + Collection iterations = iterationMap.get(routeSequence); + nIterations += iterations.size(); + allRouteIds.addAll(routeSequence.routes); + summedTotalTime += iterations.stream().mapToInt(i -> i.totalTime).sum(); + } + // Many destinations will have no iterations at all passing through the SelectedLink area. + // Skip those to keep the CSV output short. + if (nIterations > 0) { + String[] row = new String[DATA_COLUMNS.length]; + Arrays.fill(row, "ALL"); + String allRouteIdsPipeSeparated = Arrays.stream(allRouteIds.toArray()) + .mapToObj(transitLayer.routes::get) + .map(routeInfo -> routeInfo.route_id) + .collect(Collectors.joining("|")); + row[0] = allRouteIdsPipeSeparated; + row[row.length - 1] = Integer.toString(nIterations); + row[row.length - 2] = String.format("%.1f", summedTotalTime / nIterations / 60d); // Average total time + summary[d].add(row); + } + continue; + } + // Standard (non SelectedLink) case. for (RouteSequence routeSequence: iterationMap.keySet()) { Collection iterations = iterationMap.get(routeSequence); int nIterations = iterations.size(); diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/SelectedLink.java b/src/main/java/com/conveyal/r5/analyst/cluster/SelectedLink.java new file mode 100644 index 000000000..4cbae63bb --- /dev/null +++ b/src/main/java/com/conveyal/r5/analyst/cluster/SelectedLink.java @@ -0,0 +1,278 @@ +package com.conveyal.r5.analyst.cluster; + +import com.conveyal.r5.analyst.cluster.PathResult.Iteration; +import com.conveyal.r5.transit.TransitLayer; +import com.conveyal.r5.transit.TransportNetworkCache; +import com.conveyal.r5.transit.TripPattern; +import com.conveyal.r5.transit.path.PatternSequence; +import com.conveyal.r5.util.TIntIntHashMultimap; +import com.conveyal.r5.util.TIntIntMultimap; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; +import gnu.trove.TIntCollection; +import gnu.trove.map.TIntObjectMap; +import gnu.trove.set.TIntSet; +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.geom.LineString; +import org.locationtech.jts.geom.Polygon; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.invoke.MethodHandles; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import static com.conveyal.r5.common.GeometryUtils.envelopeForCircle; +import static com.conveyal.r5.common.GeometryUtils.polygonForEnvelope; +import static com.google.common.base.Preconditions.checkState; + +/* + +Implementation considerations: + +- Shapes are one of the biggeset parts of GTFS feeds. +- TransitLayer can associate shapes with each TripPattern and extract the sub-shapes between each stop. +- However this functionality is hard-wired to be disabled (with a constant SAVE_SHAPES) during network build. +- This means all existing TransportNetworks do not have shapes attached to their TripPatterns. +- Enabling this across the board is expected to make all TransitLayers significantly larger. + +- Every single path generated needs to be subject to filtering. +- Geographic intersections can be quite slow. +- The geographic intersections need to be precomputed and looked up quickly during routing. + +- The SAVE_SHAPES section of TransitLayer building is relatively complicated. +- It invovles projecting stops onto shapes and splitting them, and must handle cases where shapes are missing. +- We don't want to replicate this existing logic elsewhere. + +GtfsController accesses GeometryCache in GtfsCache.patternShapes, but this just saves entire exemplar trip geometries, +not inter-stop segments. TripPattern.getHopGeometries looks relatively simple using LocationIndexedLine, but depends on +some fairly complicated stop-to-shape snapping logic in the SAVE_SHAPES section of TransitLayer to pre-build fields. +We could either re-run this code after the fact to inject the shapes into existing network files, or we could enable it +with a network build time switch. We need to turn on shape storage in the TripPatterns, or otherwise iterate through +all of them in a streaming fashion to record every one that passes through the bounding box. + +TripPattern does make the assumption that all trips on the same pattern have the same geometry (or can be reasonably +represented with the same geometry drawn from one of the trips). + +In existing serialized TransitLayers, TripPattern.getHopGeometries usually returns straight lines because +TripPattern.shape is null (it is hard-wired to not save shapes in TransitLayers). However, the GTFS MapDBs so still +contain the shapes for each trip. This is how we show them in GtfsController making VectorMapTiles. We already have +spatial index capabilities at gtfsCache.patternShapes.queryEnvelope(bundleScopedFeedId, tile.envelope). See L206-209 of +GtfsController. However, this does not retain enough information about the segments between stops in the patterns, and +uses a lot of space for all those geometries. + +Networks are always built and scenarios always applied on workers. Workers do have access to GTFSFeed files. +WorkerComponents has a TransportNetworkCache which is injected into the AnalysisWorker constructor. This is the only +path to access a GtfsCache, and that GtfsCache is private, so we need methods on TransportNetworkCache. The full path +to this GtfsCache is: AnalysisWorker.networkPreloader.transportNetworkCache.gtfsCache. + +The best way to prototype the intended behavior is to create a new modification type. This provides a mechanism for +attaching things to a network, at a point where we may still have access to the GTFS feeds. It also ensures that the +network with this extra information is properly cached for similar subsequent requests (as in a regional analysis). +We can't attach the precomputed selected link information to the raw base TransportNetwork, because then the first +request for that network would always need to be one with the selected-link behavior specified. Networks are expected +to be read-only once loaded, and anyway subsequent requests for the same network hit a cache and don't pass through +the right place to access the GTFSCache and update the in-memory network. We need to be able to apply it later to a +network that was already loaded without the selected link specified. We could treat the network as mutable and write to +it, but this does not follow existing design and would require mentally modeling how to manupulate the system to get +the desired effect. + +TransportNetworkCache#getNetworkForScenario is where we always apply scenarios in the worker, and that class has direct +access to the GtfsCache. + +Deciding whether to create SelectedLink via a Modification or a per-request parameter: + +The SelectedLink instance (fast checking whether paths pass through an area) needs to be stored/referenced: +- Somewhere that is reachable from inside PathResult.setTarget or PathResult.summarizeIterations +- Somewhere that is correctly scoped to where the selected-link filtering is specified (request/task or scenario) +- Somewhere that is writable in the places where we have access to the gtfsCache +- Somewhere that is PERSISTENT across requests - this is inherently the case for TransportNetwork but for Task we'd + need to introduce another cache. The problem being that the base TransportNetwork's scope is too wide (could be + used in requests with or without the SelectedLink), so it needs to be a modification on a specific scenario. + +The PathResult constructor is passed a Task and a TransitLayer. It retains only the TransitLayer but could retain both. +In AnalysisWorker.handleAndSerializeOneSinglePointTask we still have the task context, but deeper on the stack in +networkPreloader and then transportNetworkCache (which has gtfsCache), we have the scenario but not the task. But +then once you go deeper into applying the individual scenario modifications, the gtfsCache is no longer visible. + +SelectedLink doesn't feel like a modification. It feels like a parameter to the CSV path output in the task. +The AnalysisWorker could have a Map from SelectionBox to SelectedLink, but then the keys are full of floating-point +coordinate numbers, which requires fuzzy matching on these keys to look up the precomputed data. This could get very +ugly. + +We also need to tie pre-existin items in the TransportNetwork (TripPatterns) to new items from the GTFS. It feels like +this should be on a scenario copy of a TransportNetwork. It's ugly, but it would be possible to scan over the incoming +modifications and inject the GtfsCache (or pre-selected GTFSFeeds) onto a transient field of any SelectedLink +Modification present. + +Getting this information into the network resulting from applying a Scenario makes it auto-retained, gives it a stable +identity so we don't need to fuzzy-match it in the task to cache. That could also be done by uploading a geometry file +with an ID, but that's so much indirection for a single small polygon. In the future it would make sense to treat all +lat/lon as effectively integers (fixed-point) since it simplifies this kind of keying and matching. + +Alternatively we could enable on the storage of GTFS route shapes on the network file when it's built. Then the +modification could be applied normally without injecting a GtfsCache or GtfsFeeds. But again that bloats the size of +every network just for the odd case where someone wants to do selected link analysis. + +Bundle Scoping problem: + +The feed IDs expected by gtfsCache (i.e. gtfs file names) are bundle-scoped but the ones in the TripPatterns are not. +TransportNetworks and TransitLayers apparently do not retain their bundle ID. In any case they can have multiple feeds +originally uploaded with different bundles. TransitLayer.feedChecksums keys are the same feed IDs prefixing +TripPattern.routeId, which are the gtfsFeed.feedId, which is not bundle-scoped so can't be used to get a feed from +gtfsCache. + +A network is always based on one bundle with the same ID, but the bundle config can also reference GTFS with a +different bundle scope (originally uploaded for another bundle). So knowing the network ID is not sufficient to find +a GTFS feed from its un-scoped UUID. + +Based on GtfsController.bundleScopedFeedIdFromRequest, the bundleScopedFeedId is feedId_feedGroupId. They're no longer +based on the bundle/network ID, but the feed group. It seems like we wouldn't need these scopes at all since all feeds +now have unique IDs. Removing them could cause a lot of disruption though. + +When we make the TransportNetwork from these bundles, it's always on a worker, using information from the bundle's +TransportNetworkConfig JSON file. This is in TransportNetworkCache.buildNetworkFromConfig(). At first it looks like +the bundleScopedId is completely lost after we go through the loading process. GtfsCache.get(String id) does store +that key id in feed.uniqueId, but that field is never read (or written) anywhere else. + +This means the bundle ID is available during network creation to be retained in the TransportNetwork, but they aren't +retained. I think the only place we can get these bundle scoped feed IDs is from the TransportNetworkConfig JSON file. +Perhaps that should be serialized into the TransportNetwork itself (check the risk of serializing applied Modifications). +But in the meantime TNCache has a method to load that configuration and get the bundle scopes. + +The distinction between stop indexes at the TransitLayer level and stop indexes (positions) within the TripPattern is +critical and can cause some comparisons to fail silently, while superficially appearing to do something meaningful. +The lightweight newtype pattern would be really useful here but doesn't exist in Java. It is not practical to change +the source path information we receive such that it stores stop indexes within the TripPattern and defers resolution +to TransitLayer indexes and names. Looking at the Path constructor and RaptorState, we don't have this information as +RaptorStates are stored in array slots indexed on stop indexes from the TransitLayer (not the TripPattern level ones). + + */ + +/** + * This class is used in performing "selected link" analysis (R5 issue #913). It retains a precomputed collection + * containing every segment of every TripPattern that passes through a certain polygon, and provides methods for quickly + * checking whether any leg of a public transit trip overlaps these precomputed segments. + */ +public class SelectedLink { + + private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + /** + * Contains all TripPattern inter-stop hops that pass through the selected link area for fast hash-based lookup. + * Keys are the index of a TripPattern in the TransitLayer, and values are arrays of stop positions within that + * TripPattern (NOT the stop index within the TransitLayer). A hop from stop position N to stop position N+1 on + * pattern at index X is recorded as the mapping X -> N. There may be several such hops within a single pattern, + * thus an array with more than one element, in the case where one or more transit stops along the pattern fall + * within the SelectedLink search radius. + */ + private final TIntObjectMap hopsInTripPattern; + + /** + * The TransitLayer relative to which all TripPattern indexes and stop indexes should be interpreted. + * This is the TransitLayer of the TransportNetwork that holds this SelectedLink instance. + * It should be treated as strictly read-only. + * Applying further scenarios could perhaps cause the two references to diverge, but the information here in the + * base TransitLayer should remain fixed and valid for interpreting hopsInTripPattern. + */ + private final TransitLayer transitLayer; + + public SelectedLink(TransitLayer transitLayer, TIntObjectMap hopsInTripPattern) { + this.transitLayer = transitLayer; + this.hopsInTripPattern = hopsInTripPattern; + } + + /** + * For a given transit ride from a board stop to an alight stop on a TripPattern, return true if that ride + * passes through any of the hops in this SelectedLink area, or false if it's entirely outside the area. + * This is complicated by the fact that the board and alight stops are TransitLayer-wide stop indexes, + * not the position of the stop within the pattern. It is possible (though unlikely) that a board and alight + * stop pair could ambiguously refer to more than one sub-segment of the same pattern when one of the stops appears + * more than once in the pattern's stop sequence. We find the earliest matching sub-segment in the sequence. + */ + private boolean includes (int pattern, int board, int alight) { + int[] hops = hopsInTripPattern.get(pattern); + // Short-circuit: bail out early from most comparisons when the trip pattern has no hops in the SelectedLink. + if (hops == null) { + return false; + } + // Less common case: one or more hops in the pattern of this transit leg do fall inside this SelectedLink. + // Determine at which positions in the pattern the board and alight stops are located. Begin looking for + // the alight position after the board position, imposing order constraints and reducing potential for + // ambiguity where stops appear more than once in the same pattern. + int boardPos = stopPositionInPattern(pattern, board, 0); + int alightPos = stopPositionInPattern(pattern, alight, boardPos + 1); + for (int hop : hops) { + // Hops are identified with the stop position at their beginning so the alight comparison is exclusive: + // a leg alighting at a stop does not ride over the hop identified with that stop position. + if (boardPos <= hop && alightPos > hop) { + return true; + } + } + return false; + } + + /** + * Translate a stop index within the TransitLayer to a stop position within the TripPattern with the given index. + */ + private int stopPositionInPattern (int patternIndex, int stopIndexInTransitLayer, int startingAtPos) { + TripPattern tripPattern = transitLayer.tripPatterns.get(patternIndex); + for (int s = startingAtPos; s < tripPattern.stops.length; s++) { + if (tripPattern.stops[s] == stopIndexInTransitLayer) { + return s; + } + } + String message = String.format("Did not find stop %d in pattern %d", stopIndexInTransitLayer, patternIndex); + throw new IllegalArgumentException(message); + } + + /** + * Check whether the given PatternSequence has at least one transit leg that passes through this SelectedLink area. + */ + private boolean traversedBy (PatternSequence patternSequence) { + // Why are some patterns TIntLists null? Are these walk-only routes with no transit legs? + if (patternSequence.patterns == null) { + return false; + } + // Iterate over the three parallel arrays containing TripPattern, board stop, and alight stop indexes. + for (int ride = 0; ride < patternSequence.patterns.size(); ride++) { + int pattern = patternSequence.patterns.get(ride); + int board = patternSequence.stopSequence.boardStops.get(ride); + int alight = patternSequence.stopSequence.alightStops.get(ride); + if (this.includes(pattern, board, alight)) { + // logTriple(pattern, board, alight); + return true; + } + } + return false; + } + + /** + * This filters a particular type of Multimap used in PathResult and TravelTimeReducer.recordPathsForTarget(). + * For a single origin-destination pair, it captures all transit itineraries connecting that origin and destination. + * The keys represent sequences of transit rides between specific stops (TripPattern, board stop, alight stop). + * The values associated with each key represent individual raptor iterations that used that sequence of rides, + * each of which may have a different departure time, wait time, and total travel time. This method returns a + * filtered COPY of the supplied Multimap, with all mappings removed for keys that do not pass through this + * SelectedLink area. This often yields an empty Multimap, greatly reducing the number of rows in the CSV output. + */ + public Multimap filterPatterns (Multimap patterns) { + Multimap filteredPatterns = HashMultimap.create(); + for (PatternSequence patternSequence : patterns.keySet()) { + if (this.traversedBy(patternSequence)) { + Collection iterations = patterns.get(patternSequence); + filteredPatterns.putAll(patternSequence, iterations); + } + } + return filteredPatterns; + } + + private void logTriple (int pattern, int boardStop, int alightStop) { + String routeId = transitLayer.tripPatterns.get(pattern).routeId; + String boardStopName = transitLayer.stopNames.get(boardStop); + String alightStopName = transitLayer.stopNames.get(alightStop); + LOG.info("Route {} from {} to {}", routeId, boardStopName, alightStopName); + } +} diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/TransportNetworkConfig.java b/src/main/java/com/conveyal/r5/analyst/cluster/TransportNetworkConfig.java index 012e3204a..be7a1e6e5 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/TransportNetworkConfig.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/TransportNetworkConfig.java @@ -32,7 +32,7 @@ public class TransportNetworkConfig { /** ID of the OSM file, for use with OSMCache */ public String osmId; - /** IDs of the GTFS files, for use with GTFSCache */ + /** IDs of the GTFS files, for use with GTFSCache. These are "bundle-scoped" in the form feedId_feedGroupId. */ public List gtfsIds; /** The fare calculator for analysis, if any. TODO this is not yet wired up to TransportNetwork.setFareCalculator. */ diff --git a/src/main/java/com/conveyal/r5/analyst/scenario/ModificationTypeResolver.java b/src/main/java/com/conveyal/r5/analyst/scenario/ModificationTypeResolver.java index c4b95696e..599ca92bb 100644 --- a/src/main/java/com/conveyal/r5/analyst/scenario/ModificationTypeResolver.java +++ b/src/main/java/com/conveyal/r5/analyst/scenario/ModificationTypeResolver.java @@ -49,6 +49,7 @@ public class ModificationTypeResolver extends TypeIdResolverBase { .put("raster-cost", RasterCost.class) .put("shapefile-lts", ShapefileLts.class) .put("set-fare-calculator", SetFareCalculator.class) + .put("select-link", SelectLink.class) .build(); @Override diff --git a/src/main/java/com/conveyal/r5/analyst/scenario/SelectLink.java b/src/main/java/com/conveyal/r5/analyst/scenario/SelectLink.java new file mode 100644 index 000000000..c00b1b696 --- /dev/null +++ b/src/main/java/com/conveyal/r5/analyst/scenario/SelectLink.java @@ -0,0 +1,179 @@ +package com.conveyal.r5.analyst.scenario; + +import com.conveyal.gtfs.GTFSFeed; +import com.conveyal.r5.analyst.cluster.SelectedLink; +import com.conveyal.r5.transit.RouteInfo; +import com.conveyal.r5.transit.TransitLayer; +import com.conveyal.r5.transit.TransportNetwork; +import com.conveyal.r5.transit.TripPattern; +import gnu.trove.list.array.TIntArrayList; +import gnu.trove.map.TIntObjectMap; +import gnu.trove.map.hash.TIntObjectHashMap; +import org.locationtech.jts.geom.LineString; +import org.locationtech.jts.geom.Polygon; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.invoke.MethodHandles; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static com.conveyal.r5.common.GeometryUtils.envelopeForCircle; +import static com.conveyal.r5.common.GeometryUtils.polygonForEnvelope; +import static com.google.common.base.Strings.isNullOrEmpty; + +/** + * This custom Modification restricts CSV path output to only include transit passing through a specified rectangle. + * This allows cutting down the size of the output considerably, consolidating results in a way that's useful for some + * network assignment and congestion problems. The parameters lon, lat, and radiusMeters define a selection box. + * @see SelectedLink + */ +public class SelectLink extends Modification { + + private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + /// Public fields supplied on the custom modification. + + public double lon; + + public double lat; + + public double radiusMeters; + + /// Private derived fields used in subsequent calculations. + + private Polygon boxPolygon; + + private Map feedForUnscopedId; + + @Override + public boolean resolve(TransportNetwork network) { + // Convert the incoming description of the selected link area to a Geometry for computing intersections. + boxPolygon = polygonForEnvelope(envelopeForCircle(lon, lat, radiusMeters)); + // Iterate over all TripPatterns in the TransitLayer and ensure that we can associate a feed with each one. + // These feeds must have been previously supplied with the injectGtfs() method. The feed IDs recorded in the + // TripPatterns are not bundle-scoped. Check that a feed with a correctly de-scoped ID was supplied for every + // TripPattern in this TransitLayer. + for (TripPattern tripPattern : network.transitLayer.tripPatterns) { + String feedId = feedIdForTripPattern(tripPattern); + if (isNullOrEmpty(feedId)) { + errors.add("Could not find feed ID prefix in route ID " + tripPattern.routeId); + continue; + } + GTFSFeed feed = feedForUnscopedId.get(feedId); + if (feed == null) { + errors.add("Could not find feed for ID " + feedId); + } + } + return errors.size() > 0; + } + + @Override + public boolean apply(TransportNetwork network) { + // This method is basically serving as a factory method for a SelectedLink instance. Those instances are + // immutable, so need some kind of external factory or builder to construct them incrementally. + TIntObjectMap hopsInTripPattern = new TIntObjectHashMap<>(); + + // During raptor search, paths are recorded in terms of pattern and stop index numbers rather than + // TripPattern and Stop instance references, so iterate over the numbers. + for (int patternIndex = 0; patternIndex < network.transitLayer.tripPatterns.size(); patternIndex++) { + TripPattern tripPattern = network.transitLayer.tripPatterns.get(patternIndex); + // Make a sacrificial protective copy to avoid interfering with other requests using this network. + // We're going to add shape data to this TripPattern then throw it away immediately afterward. + // Be careful not to use a reference to this clone as a key in any Maps, it will not match TransitLayer. + tripPattern = tripPattern.clone(); + String feedId = feedIdForTripPattern(tripPattern); + GTFSFeed feed = feedForUnscopedId.get(feedId); + TransitLayer.addShapeToTripPattern(feed, tripPattern); + // TransitLayer parameter enables fetching straight lines between stops in case shapes are not present. + List hopGeometries = tripPattern.getHopGeometries(network.transitLayer); + TIntArrayList intersectedHops = new TIntArrayList(); + for (int hop = 0; hop < hopGeometries.size(); hop++) { + LineString hopGeometry = hopGeometries.get(hop); + if (boxPolygon.intersects(hopGeometry)) { + intersectedHops.add(hop); + } + } + if (!intersectedHops.isEmpty()) { + hopsInTripPattern.put(patternIndex, intersectedHops.toArray()); + } + } + + // After finding all links (TripPattern hops) in the SelectedLink area, release the GTFSFeeds which don't really + // belong in a Modification. This avoids memory leaks, and protects us from inadvertently relying on or + // modifying those feed objects later. + feedForUnscopedId = null; + + // To confirm expected behavior, record all selected links in Modification.info for the user, and log to console. + LOG.info("Selected links for CSV path output:"); + hopsInTripPattern.forEachEntry((int patternIndex, int[] stopPositions) -> { + TripPattern tripPattern = network.transitLayer.tripPatterns.get(patternIndex); + RouteInfo routeInfo = network.transitLayer.routes.get(tripPattern.routeIndex); + String stopNames = Arrays.stream(stopPositions) + .map(s -> tripPattern.stops[s]) + .mapToObj(network.transitLayer.stopNames::get) + .collect(Collectors.joining(", ")); + String message = String.format("Route %s direction %s after stop %s", routeInfo.getName(), tripPattern.directionId, stopNames); + addInfo(message); + LOG.info(message); + return true; + }); + + // Store the resulting precomputed information in a SelectedLink instance on the TransportNetwork. + // This could also be on the TransitLayer, but we may eventually want to include street edges in SelectedLink. + network.selectedLink = new SelectedLink(network.transitLayer, hopsInTripPattern); + return errors.size() > 0; + } + + // By returning false for both affects methods, we make a very shallow copy of the TransitNetwork for efficiency. + + @Override + public boolean affectsStreetLayer() { + return false; + } + + @Override + public boolean affectsTransitLayer() { + return false; + } + + @Override + public int getSortOrder() { + // This modification needs to be applied after any modifications affecting the transit network. + // It appears this method is never called, maybe because sort order from CustomModificationHolder is used. + return 80; + } + + /** + * Currently we do not include the GTFS shapes in the TransportNetwork, so in order to determine which routes pass + * through a given geographic area, we need access to the original GTFS data. When resolving and applying + * Modifications, the only thing available is the TransportNetwork itself. This method is used to supply any needed + * GtfsFeeds keyed on their non-bundle-scoped, Conveyal-assigned feed UUID. + * A TransportNetwork may be made from a bundle with multiple feeds, so we can't attach just one GTFSFeed. + * The TransportNetwork does not directly retain information on which feeds were used to create it, but each + * TripPattern retains a feedId as a prefix to its routeId in the format feedUUID:route_id. + * However, those feed IDs lack the bundle scope (feed group ID) needed to get feeds from GtfsCache. + * This all deviates significantly from pre-existing design, but does work and reveals some important + * considerations for any future revisions of scenario application or network building system. + */ + public void injectGtfs(Map feedForUnscopedId) { + this.feedForUnscopedId = feedForUnscopedId; + } + + /** + * The TransportNetwork does not directly retain information on which feeds were used to create it, but each of its + * TripPatterns retains a feed-scoped routeId in this format: String.format("%s:%s", gtfs.feedId, route.route_id). + * This feed ID is not bundle-scoped as expected by GtfsCache when loading feeds,so requires additional mapping. + */ + private String feedIdForTripPattern (TripPattern tripPattern) { + String[] parts = tripPattern.routeId.split(":"); + if (parts.length == 0) { + return null; + } else { + return parts[0]; + } + } + +} diff --git a/src/main/java/com/conveyal/r5/analyst/scenario/package-info.java b/src/main/java/com/conveyal/r5/analyst/scenario/package-info.java index 3b6b1c9f9..00a68c747 100644 --- a/src/main/java/com/conveyal/r5/analyst/scenario/package-info.java +++ b/src/main/java/com/conveyal/r5/analyst/scenario/package-info.java @@ -2,5 +2,10 @@ * This package contains classes for modeling transport scenarios as an ordered series of modifications to be applied * to an underlying baseline graph. It is used for impact analysis: the interactive creation and comparison of the * accessibility effects of modifications to a transport network. + * + * It is important to note that each of these classes has a corresponding model for use in the UI and database. + * Each type of modification has an R5 version (which is more stable over time) and a UI/DB version which can be + * changed more freely. Conversion to the R5 types in this package is performed by an implementation of + * com.conveyal.analysis.models.Modification.toR5(). */ package com.conveyal.r5.analyst.scenario; \ No newline at end of file diff --git a/src/main/java/com/conveyal/r5/common/GeometryUtils.java b/src/main/java/com/conveyal/r5/common/GeometryUtils.java index c41eef536..42cfd2665 100644 --- a/src/main/java/com/conveyal/r5/common/GeometryUtils.java +++ b/src/main/java/com/conveyal/r5/common/GeometryUtils.java @@ -8,6 +8,7 @@ import org.locationtech.jts.geom.Geometry; import org.locationtech.jts.geom.GeometryFactory; import org.locationtech.jts.geom.LineSegment; +import org.locationtech.jts.geom.Polygon; import static com.conveyal.r5.streets.VertexStore.fixedDegreesToFloating; import static com.conveyal.r5.streets.VertexStore.floatingDegreesToFixed; @@ -16,6 +17,9 @@ /** * Reimplementation of OTP GeometryUtils, using copied code where there are not licensing concerns. * Also contains reusable methods for validating WGS84 envelopes and latitude and longitude values. + * + * FIXME we have two geometry util classes com.conveyal.r5.common.GeometryUtils and com.conveyal.gtfs.util.GeometryUtil + * Each has its own GeometryFactory. */ public class GeometryUtils { public static final GeometryFactory geometryFactory = new GeometryFactory(); @@ -172,4 +176,33 @@ thingBeingChecked, roughWgsEnvelopeArea(envelope), MAX_BOUNDING_BOX_AREA_SQ_KM } } + /** + * Given floating point WGS84 latitude and longitude and a radius in meters, create an envelope inscribing that + * circle. + */ + public static Envelope envelopeForCircle (double lon, double lat, double radiusMeters) { + checkLat(lat); + checkLon(lon); + checkArgument(radiusMeters < 1000, "Radius must be less than 1km."); + Envelope envelope = new Envelope(); + envelope.expandToInclude(lon, lat); + double latExpansion = SphericalDistanceLibrary.metersToDegreesLatitude(radiusMeters); + double lonExpansion = SphericalDistanceLibrary.metersToDegreesLongitude(radiusMeters, lat); + envelope.expandBy(lonExpansion, latExpansion); + return envelope; + } + + /** + * Some geometry operations such as intersections and inclusion can only operate on Geometries, not Envelopes. + */ + public static Polygon polygonForEnvelope (Envelope env) { + return geometryFactory.createPolygon(new Coordinate[] { + new Coordinate(env.getMinX(), env.getMinY()), + new Coordinate(env.getMinX(), env.getMaxY()), + new Coordinate(env.getMaxX(), env.getMaxY()), + new Coordinate(env.getMaxX(), env.getMinY()), + new Coordinate(env.getMinX(), env.getMinY()) + }); + } + } diff --git a/src/main/java/com/conveyal/r5/transit/TransitLayer.java b/src/main/java/com/conveyal/r5/transit/TransitLayer.java index 871491ff2..47bda5945 100644 --- a/src/main/java/com/conveyal/r5/transit/TransitLayer.java +++ b/src/main/java/com/conveyal/r5/transit/TransitLayer.java @@ -63,6 +63,10 @@ public class TransitLayer implements Serializable, Cloneable { /** Maximum distance to record in distance tables, in meters. */ public static final int WALK_DISTANCE_LIMIT_METERS = 2000; + /** + * If this is true, the detailed shapes from GTFS will be retained in the TransitLayer. + * If false, straight-line shapes will be used between stops. + */ public static final boolean SAVE_SHAPES = false; /** @@ -230,7 +234,7 @@ public void loadFromGtfs (GTFSFeed gtfs, LoadLevel level) throws DuplicateFeedEx LOG.info("Creating trip patterns and schedules."); - // These are temporary maps used only for grouping purposes. + // These are temporary maps used only for grouping purposes within this one GTFS feed, not all feeds in a bundle. Map tripPatternForPatternId = new HashMap<>(); Multimap tripsForBlock = HashMultimap.create(); @@ -242,7 +246,6 @@ public void loadFromGtfs (GTFSFeed gtfs, LoadLevel level) throws DuplicateFeedEx Trip trip = gtfs.trips.get(tripId); Route route = gtfs.routes.get(trip.route_id); // Construct the stop pattern and schedule for this trip. - String scopedRouteId = String.join(":", gtfs.feedId, trip.route_id); TIntList arrivals = new TIntArrayList(TYPICAL_NUMBER_OF_STOPS_PER_TRIP); TIntList departures = new TIntArrayList(TYPICAL_NUMBER_OF_STOPS_PER_TRIP); TIntList stopSequences = new TIntArrayList(TYPICAL_NUMBER_OF_STOPS_PER_TRIP); @@ -270,9 +273,10 @@ public void loadFromGtfs (GTFSFeed gtfs, LoadLevel level) throws DuplicateFeedEx continue TRIPS; } - if (previousDeparture == st.arrival_time) { //Teleportation: arrive at downstream stop immediately after departing upstream - //often the result of a stop_times input with time values rounded to the nearest minute. - //TODO check if the distance of the hop is reasonably traveled in less than 60 seconds, which may vary by mode. + if (previousDeparture == st.arrival_time) { + // Teleportation: arrive at downstream stop immediately after departing upstream + // often the result of a stop_times input with time values rounded to the nearest minute. + // TODO check if the distance of the hop is reasonably traveled in less than 60 seconds, which may vary by mode. nZeroDurationHops++; } @@ -288,79 +292,24 @@ public void loadFromGtfs (GTFSFeed gtfs, LoadLevel level) throws DuplicateFeedEx String patternId = gtfs.patternForTrip.get(tripId); + // Fetch or make the internal R5 TripPattern for the given gtfs-lib Pattern. + // Note that gtfs-lib Pattern IDs are UUIDs, so should be unique even across different feeds. + // The first time we encounter a trip with a given Pattern ID, we make the TripPattern. + // Certain characteristics of the pattern are derived from that Trip (notably the Shape) but could in + // theory be different on each trip in the pattern. In this edge case, encounter order of trips matters. TripPattern tripPattern = tripPatternForPatternId.get(patternId); if (tripPattern == null) { tripPattern = new TripPattern(String.format("%s:%s", gtfs.feedId, route.route_id), stopTimes, indexForUnscopedStopId); - - // if we haven't seen the route yet _from this feed_ (as IDs are only feed-unique) - // create it. if (level == LoadLevel.FULL) { + // If we haven't seen the route yet _from this feed_ (as IDs are only feed-unique) create it. if (!routeIndexForRoute.containsKey(trip.route_id)) { int routeIndex = routes.size(); RouteInfo ri = new RouteInfo(route, gtfs.agency.get(route.agency_id)); routes.add(ri); routeIndexForRoute.put(trip.route_id, routeIndex); } - tripPattern.routeIndex = routeIndexForRoute.get(trip.route_id); - - if (trip.shape_id != null && SAVE_SHAPES) { - Shape shape = gtfs.getShape(trip.shape_id); - if (shape == null) LOG.warn("Shape {} for trip {} was missing", trip.shape_id, trip.trip_id); - else { - // TODO this will not work if some trips in the pattern don't have shapes - tripPattern.shape = shape.geometry; - - // project stops onto shape - boolean stopsHaveShapeDistTraveled = StreamSupport.stream(stopTimes.spliterator(), false) - .noneMatch(st -> Double.isNaN(st.shape_dist_traveled)); - boolean shapePointsHaveDistTraveled = DoubleStream.of(shape.shape_dist_traveled) - .noneMatch(Double::isNaN); - - LinearLocation[] locations; - - if (stopsHaveShapeDistTraveled && shapePointsHaveDistTraveled) { - // create linear locations from dist traveled - locations = StreamSupport.stream(stopTimes.spliterator(), false) - .map(st -> { - double dist = st.shape_dist_traveled; - - int segment = 0; - - while (segment < shape.shape_dist_traveled.length - 2 && - dist > shape.shape_dist_traveled[segment + 1] - ) segment++; - - double endSegment = shape.shape_dist_traveled[segment + 1]; - double beginSegment = shape.shape_dist_traveled[segment]; - double proportion = (dist - beginSegment) / (endSegment - beginSegment); - - return new LinearLocation(segment, proportion); - }).toArray(LinearLocation[]::new); - } else { - // naive snapping - LocationIndexedLineInLocalCoordinateSystem line = - new LocationIndexedLineInLocalCoordinateSystem(shape.geometry.getCoordinates()); - - locations = StreamSupport.stream(stopTimes.spliterator(), false) - .map(st -> { - Stop stop = gtfs.stops.get(st.stop_id); - return line.project(new Coordinate(stop.stop_lon, stop.stop_lat)); - }) - .toArray(LinearLocation[]::new); - } - - tripPattern.stopShapeSegment = new int[locations.length]; - tripPattern.stopShapeFraction = new float[locations.length]; - - for (int i = 0; i < locations.length; i++) { - tripPattern.stopShapeSegment[i] = locations[i].getSegmentIndex(); - tripPattern.stopShapeFraction[i] = (float) locations[i].getSegmentFraction(); - } - } - } } - tripPatternForPatternId.put(patternId, tripPattern); tripPattern.originalId = tripPatterns.size(); tripPatterns.add(tripPattern); @@ -387,11 +336,21 @@ public void loadFromGtfs (GTFSFeed gtfs, LoadLevel level) throws DuplicateFeedEx } LOG.info("Done creating {} trips on {} patterns.", nTripsAdded, tripPatternForPatternId.size()); + // Store shapes and associated linear locations of stops on the set of patterns created by this one GTFS feed. + // Instead, we usually perform this one pattern at a time on demand, only when needed by modifications. + if (SAVE_SHAPES) { + LOG.info("Referencing stop locations to shapes and breaking shapes into per-hop segments..."); + for (TripPattern tripPattern : tripPatternForPatternId.values()) { + addShapeToTripPattern(gtfs, tripPattern); + } + LOG.info("Done processing shapes."); + } + LOG.info("{} zero-duration hops found.", nZeroDurationHops); LOG.info("Chaining trips together according to blocks to model interlining..."); // Chain together trips served by the same vehicle that allow transfers by simply staying on board. - // Elsewhere this is done by grouping by (serviceId, blockId) but this is not supported by the spec. + // This is done elsewhere by grouping by (serviceId, blockId) but this is not supported by the spec. // Discussion started on gtfs-changes. tripsForBlock.asMap().forEach((blockId, trips) -> { TripSchedule[] schedules = trips.toArray(new TripSchedule[trips.size()]); @@ -411,9 +370,8 @@ public void loadFromGtfs (GTFSFeed gtfs, LoadLevel level) throws DuplicateFeedEx LOG.info("Finding the approximate center of the transport network..."); findCenter(gtfs.stops.values()); - //Set transportNetwork timezone - //If there are no agencies (which is strange) it is GMT - //Otherwise it is set to first valid agency timezone and warning is shown if agencies have different timezones + // Set TransportNetwork timezone. If there are no agencies (which is strange) it defaults to GMT. + // Otherwise, it is set to first valid agency timezone. Warning is shown if agencies have different timezones. if (gtfs.agency.size() == 0) { timeZone = ZoneId.of("GMT"); LOG.warn("graph contains no agencies; API request times will be interpreted as GMT."); @@ -468,6 +426,112 @@ public void loadFromGtfs (GTFSFeed gtfs, LoadLevel level) throws DuplicateFeedEx } + /** + * This code has been factored out of loadFromGtfs because it adds a lot of data to the TransportNetwork. We usually + * don't run it when creating the TransportNetwork, but instead run it as needed on single TripPatterns to save + * space. Factoring pieces out of loadFromGtfs also makes that method a bit more readable as it's very long. + * TODO test how much this actually increases feed size and consider enabling it when creating TransportNetworks. + * This could also be changed to return a compound type of the (shape, stopShapeSegment, and stopShapeFraction) + * fields of TripPattern, or only an array or list of hop geometries (if we don't ever want to persist them). + */ + public static void addShapeToTripPattern ( + GTFSFeed gtfsFeed, + TripPattern tripPattern + ) { + // First, find an exemplar trip that is representative of the TripPattern. + boolean foundExemplarTrip = false; + Trip trip = null; + Iterable stopTimes = null; + for (TripSchedule tripSchedule : tripPattern.tripSchedules) { + // In constructor, TripSchedule.tripId = String.join(":", trip.feed_id, trip.trip_id); + String[] tripIdParts = tripSchedule.tripId.split(":"); + if (!tripIdParts[0].equals(gtfsFeed.feedId)) { + LOG.warn("Feed ID scope of trip ID for TripSchedule in TripPattern does not match supplied GTFS feed."); + continue; + } + String unscopedTripId = tripIdParts[1]; + trip = gtfsFeed.trips.get(unscopedTripId); + if (trip == null) { + LOG.warn("Could not find trip for unscoped ID " + unscopedTripId); + continue; + } + if (trip.shape_id == null) { + continue; + } + try { + stopTimes = gtfsFeed.getInterpolatedStopTimesForTrip(unscopedTripId); + } catch (GTFSFeed.FirstAndLastStopsDoNotHaveTimes e) { + continue; + } + // All checks succeeded, record the information from this trip as the exemplar for the pattern. + foundExemplarTrip = true; + break; + } + // Could add a possibly slow check: get each trip, check if exemplarTrip.shapeId equals shape in each trip. + // LOG.warn(String.format("Multiple trips in the same TripPattern have different shapes (e.g. %s and %s)") + + if (!foundExemplarTrip) { + LOG.warn("Did not find any exemplar trip with usable Shape and StopTimes for pattern " + tripPattern); + return; + } + // This is assigned outside the loop only to make it final, as required by lambdas below. + final Shape shape = gtfsFeed.getShape(trip.shape_id); + if (shape == null) { + LOG.error("Shape {} for trip {} was missing", trip.shape_id, trip.trip_id); + return; + } + tripPattern.shape = shape.geometry; + + // Project stop locations onto the shape geometry. + + boolean stopsHaveShapeDistTraveled = StreamSupport.stream(stopTimes.spliterator(), false) + .noneMatch(st -> Double.isNaN(st.shape_dist_traveled)); + boolean shapePointsHaveDistTraveled = DoubleStream.of(shape.shape_dist_traveled) + .noneMatch(Double::isNaN); + + LinearLocation[] locations; + + if (stopsHaveShapeDistTraveled && shapePointsHaveDistTraveled) { + // Create linear locations from the distance traveled along the shape. + final Shape finalShape = shape; + locations = StreamSupport.stream(stopTimes.spliterator(), false) + .map(st -> { + double dist = st.shape_dist_traveled; + + int segment = 0; + + while (segment < finalShape.shape_dist_traveled.length - 2 && + dist > finalShape.shape_dist_traveled[segment + 1] + ) segment++; + + double endSegment = finalShape.shape_dist_traveled[segment + 1]; + double beginSegment = finalShape.shape_dist_traveled[segment]; + double proportion = (dist - beginSegment) / (endSegment - beginSegment); + + return new LinearLocation(segment, proportion); + }).toArray(LinearLocation[]::new); + } else { + // naive snapping + LocationIndexedLineInLocalCoordinateSystem line = + new LocationIndexedLineInLocalCoordinateSystem(shape.geometry.getCoordinates()); + + locations = StreamSupport.stream(stopTimes.spliterator(), false) + .map(st -> { + Stop stop = gtfsFeed.stops.get(st.stop_id); + return line.project(new Coordinate(stop.stop_lon, stop.stop_lat)); + }) + .toArray(LinearLocation[]::new); + } + + tripPattern.stopShapeSegment = new int[locations.length]; + tripPattern.stopShapeFraction = new float[locations.length]; + + for (int i = 0; i < locations.length; i++) { + tripPattern.stopShapeSegment[i] = locations[i].getSegmentIndex(); + tripPattern.stopShapeFraction[i] = (float) locations[i].getSegmentFraction(); + } + } + // The median of all stopTimes would be best but that involves sorting a huge list of numbers. // So we just use the mean of all stops for now. private void findCenter (Collection stops) { diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetwork.java b/src/main/java/com/conveyal/r5/transit/TransportNetwork.java index 3e3cb3720..ae7f5b028 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetwork.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetwork.java @@ -4,6 +4,7 @@ import com.conveyal.osmlib.OSM; import com.conveyal.r5.analyst.LinkageCache; import com.conveyal.r5.analyst.WebMercatorGridPointSet; +import com.conveyal.r5.analyst.cluster.SelectedLink; import com.conveyal.r5.analyst.error.TaskError; import com.conveyal.r5.analyst.fare.InRoutingFareCalculator; import com.conveyal.r5.analyst.scenario.Scenario; @@ -91,6 +92,15 @@ public class TransportNetwork implements Serializable { /** Information about the effects of apparently correct scenario application, null on a base network */ public transient List scenarioApplicationInfo; + /** + * If non-null, CSV path outputs will be filtered down to only include paths passing through this one specific area. + * This is not really a characteristic of the network, it's more similar to request-scoped parameters like + * AnalysisRequest.recordPaths. However, it requires some data structures that are slow to build, keyed on floating + * potentially fuzzy floating point geometries, and need to be retained across many requests, so it's modeled as a + * scenario modification. This is not ideal but it works. This modification can be applied at network build time. + */ + public transient SelectedLink selectedLink; + /** * Build some simple derived index tables that are not serialized with the network. * Distance tables and street spatial indexes are now serialized with the network. diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java index b7f1f1d8f..14fadf4f8 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java @@ -6,11 +6,13 @@ import com.conveyal.file.FileStorageKey; import com.conveyal.file.FileUtils; import com.conveyal.gtfs.GTFSCache; +import com.conveyal.gtfs.GTFSFeed; import com.conveyal.r5.analyst.cluster.ScenarioCache; import com.conveyal.r5.analyst.cluster.TransportNetworkConfig; import com.conveyal.r5.analyst.scenario.Modification; import com.conveyal.r5.analyst.scenario.RasterCost; import com.conveyal.r5.analyst.scenario.Scenario; +import com.conveyal.r5.analyst.scenario.SelectLink; import com.conveyal.r5.analyst.scenario.ShapefileLts; import com.conveyal.r5.common.JsonUtilities; import com.conveyal.r5.kryo.KryoNetworkSerializer; @@ -32,6 +34,7 @@ import java.util.Collection; import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; import java.util.zip.ZipEntry; @@ -133,6 +136,25 @@ public synchronized TransportNetwork getNetworkForScenario (String networkId, St LOG.debug("Applying scenario to base network..."); // Fetch the full scenario if an ID was specified. Scenario scenario = resolveScenario(networkId, scenarioId); + + // EXPERIMENTAL: Allow select-link modification type to read GTFSFeeds, to see all shape geometries. + for (Modification modification : scenario.modifications) { + if (modification instanceof SelectLink) { + // This seems to be the only way to see the original bundle-scoped feed IDs and reverse-map them. + TransportNetworkConfig networkConfig = loadNetworkConfig(networkId); + Map feedForUnscopedId = new HashMap<>(); + for (String bundleScopedFeedId : networkConfig.gtfsIds) { + GTFSFeed feed = gtfsCache.get(bundleScopedFeedId); + String unscopedFeedId = feed.feedId; // The unscoped ID known inside the TripPatterns + GTFSFeed existingValue = feedForUnscopedId.put(unscopedFeedId, feed); + if (existingValue != null) { + LOG.warn("Feed ID collision when removing bundle/feedGroup scope from IDs."); + } + } + ((SelectLink) modification).injectGtfs(feedForUnscopedId); + } + } + // Apply any scenario modifications to the network before use, performing protective copies where necessary. // We used to prepend a filter to the scenario, removing trips that are not running during the search time window. // However, because we are caching transportNetworks with scenarios already applied to them, we can’t use @@ -159,7 +181,14 @@ private static FileStorageKey getR5NetworkFileStorageKey (String networkId) { return new FileStorageKey(BUNDLES, getR5NetworkFilename(networkId)); } - /** @return the network configuration (AKA manifest) for the given network ID, or null if no config file exists. */ + /** + * Each bundle has a corresponding JSON file listing its OSM and GTFS IDs among other things. Its base name is the + * UUID of the bundle, which is also the UUID of any derived TransportNetworks. + * There is a one-to-one correspondence between bundles and TransportNetworks: the ID of a TransportNetwork, as + * well as the base of its filename, are identical to the ID of its corresponding bundle of input data. + * TransportNetworks are always built on the workers, based on the contents of the JSON network config. + * @return the network configuration (AKA manifest) for the given network ID, or null if no config file exists. + */ private TransportNetworkConfig loadNetworkConfig (String networkId) { FileStorageKey configFileKey = new FileStorageKey(BUNDLES, getNetworkConfigFilename(networkId)); if (!fileStorage.exists(configFileKey)) { @@ -167,7 +196,7 @@ private TransportNetworkConfig loadNetworkConfig (String networkId) { } File configFile = fileStorage.getFile(configFileKey); try { - // Use lenient mapper to mimic behavior in objectFromRequestBody. + // Use lenient mapper to mimic behavior in objectFromRequestBody. Method closes the file when complete. return JsonUtilities.lenientObjectMapper.readValue(configFile, TransportNetworkConfig.class); } catch (IOException e) { throw new RuntimeException("Error reading TransportNetworkConfig. Does it contain new unrecognized fields?", e); @@ -305,7 +334,7 @@ private TransportNetwork buildNetworkFromConfig (TransportNetworkConfig config) // Apply modifications embedded in the TransportNetworkConfig JSON final Set> ACCEPT_MODIFICATIONS = Set.of( - RasterCost.class, ShapefileLts.class + RasterCost.class, ShapefileLts.class, SelectLink.class ); if (config.modifications != null) { // Scenario scenario = new Scenario(); diff --git a/src/main/java/com/conveyal/r5/transit/TripPattern.java b/src/main/java/com/conveyal/r5/transit/TripPattern.java index 7c7e08224..8c9abe2d8 100644 --- a/src/main/java/com/conveyal/r5/transit/TripPattern.java +++ b/src/main/java/com/conveyal/r5/transit/TripPattern.java @@ -3,6 +3,7 @@ import com.conveyal.gtfs.model.StopTime; import com.conveyal.r5.common.GeometryUtils; import com.conveyal.r5.streets.VertexStore; +import com.google.common.collect.Lists; import gnu.trove.list.TIntList; import gnu.trove.map.TObjectIntMap; import org.locationtech.jts.geom.Coordinate; @@ -92,6 +93,7 @@ public TripPattern (TIntList intStopIds) { } public TripPattern(String routeId, Iterable stopTimes, TObjectIntMap indexForUnscopedStopId) { + // FIXME We don't need to explicitly use a spliterator to make a list. Lists.newArrayList(stopTimes); List stopTimeList = StreamSupport.stream(stopTimes.spliterator(), false).collect(Collectors.toList()); int nStops = stopTimeList.size(); stops = new int[nStops]; diff --git a/src/main/java/com/conveyal/r5/transit/path/PatternSequence.java b/src/main/java/com/conveyal/r5/transit/path/PatternSequence.java index 215def067..9cecd027f 100644 --- a/src/main/java/com/conveyal/r5/transit/path/PatternSequence.java +++ b/src/main/java/com/conveyal/r5/transit/path/PatternSequence.java @@ -6,11 +6,15 @@ import java.util.Objects; /** - * A door-to-door path that includes the patterns ridden between stops + * A door-to-door path made up of transit legs, each composed of a TripPattern, board and alight stop. + * There is also information on ride and wait times and access and egress modes. */ public class PatternSequence { - /** Pattern indexes (those used in R5 transit layer) for each transit leg */ + + /** Pattern indexes (those used in R5 TransitLayer) for each transit leg in the itinerary. */ public final TIntList patterns; + + /** Other information associated with each of the legs: board and alight stops, ride times etc. */ public final StopSequence stopSequence; /** diff --git a/src/main/java/com/conveyal/r5/transit/path/StopSequence.java b/src/main/java/com/conveyal/r5/transit/path/StopSequence.java index 9ad83105a..f79f273a3 100644 --- a/src/main/java/com/conveyal/r5/transit/path/StopSequence.java +++ b/src/main/java/com/conveyal/r5/transit/path/StopSequence.java @@ -11,6 +11,7 @@ /** * A door-to-door path, i.e. access/egress characteristics and transit legs (keyed on characteristics including per-leg * in-vehicle times but not specific trips/patterns/routes), which may be repeated at different departure times. + * The integer stop indexes here are the indexes within the TransitLayer, not within the TripPattern or Trip ridden. * * Instances are constructed initially from transit legs, with access and egress set in successive operations. */