Skip to content

Commit

Permalink
Merge pull request #50 from nlnwa/concurrency
Browse files Browse the repository at this point in the history
New API
  • Loading branch information
maeb authored Oct 8, 2021
2 parents cc02e5b + 0c59b27 commit 866c7ca
Show file tree
Hide file tree
Showing 24 changed files with 725 additions and 705 deletions.
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
<maven.compiler.target>11</maven.compiler.target>
<docker.tag>${project.version}</docker.tag>

<veidemann.api.version>1.0.0-beta23</veidemann.api.version>
<veidemann.api.version>1.0.0-beta24</veidemann.api.version>
<veidemann.commons.version>v0.6.0</veidemann.commons.version>
<veidemann.rethinkdbadapter.version>0.8.0</veidemann.rethinkdbadapter.version>

Expand Down Expand Up @@ -91,7 +91,7 @@
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>3.6.0</version>
<version>3.7.0</version>
</dependency>

<dependency>
Expand All @@ -115,7 +115,7 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>30.1.1-jre</version>
<version>31.0.1-jre</version>
</dependency>

<!-- Configuration framework -->
Expand Down
85 changes: 13 additions & 72 deletions src/main/java/no/nb/nna/veidemann/frontier/api/Context.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
package no.nb.nna.veidemann.frontier.api;

import com.google.common.util.concurrent.ThreadFactoryBuilder;
import io.grpc.stub.ServerCallStreamObserver;
import no.nb.nna.veidemann.frontier.db.CrawlQueueManager;
import no.nb.nna.veidemann.frontier.worker.Frontier;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
Expand All @@ -21,30 +16,17 @@ public class Context {
private static final Logger LOG = LoggerFactory.getLogger(Context.class);
private final AtomicBoolean isShutdown;
private final AtomicInteger amountOfActiveObserversCounter;
private final AtomicInteger amountOfActivePageFetchesCounter;
private final Frontier frontier;

static final Lock lock = new ReentrantLock();
static final Condition notTerminated = lock.newCondition();

static final ScheduledExecutorService timoutThread = Executors.newScheduledThreadPool(
1, new ThreadFactoryBuilder().setNameFormat("fetch-timeout-%d").build());

public Context(Frontier frontier) {
isShutdown = new AtomicBoolean(false);
amountOfActiveObserversCounter = new AtomicInteger(0);
amountOfActivePageFetchesCounter = new AtomicInteger(0);
this.frontier = frontier;
}

public RequestContext newRequestContext(ServerCallStreamObserver responseObserver) {
return new RequestContext(frontier, responseObserver);
}

public boolean isCancelled() {
return isShutdown.get();
}

public void shutdown() {
isShutdown.set(true);
if (amountOfActiveObserversCounter.get() <= 0) {
Expand Down Expand Up @@ -95,66 +77,25 @@ public Frontier getFrontier() {
return frontier;
}

public int getActivePageFetchCount() {
return amountOfActivePageFetchesCounter.get();
}

public CrawlQueueManager getCrawlQueueManager() {
return frontier.getCrawlQueueManager();
}

public class RequestContext extends Context {
private final ServerCallStreamObserver responseObserver;
private final AtomicBoolean observerCompleted = new AtomicBoolean(false);
private final AtomicBoolean pageFetchStarted = new AtomicBoolean(false);
private final AtomicBoolean fetchReturned = new AtomicBoolean(false);
private ScheduledFuture<Void> timeout;

private RequestContext(Frontier frontier, ServerCallStreamObserver responseObserver) {
super(frontier);
this.responseObserver = responseObserver;
amountOfActiveObserversCounter.incrementAndGet();
LOG.trace("Client connected. Currently active clients: {}", amountOfActiveObserversCounter.get());
}

public boolean isCancelled() {
return isShutdown.get() || responseObserver.isCancelled();
}

public ServerCallStreamObserver getResponseObserver() {
return responseObserver;
}

public void startPageFetch() {
if (pageFetchStarted.compareAndSet(false, true)) {
amountOfActivePageFetchesCounter.incrementAndGet();
LOG.trace("Page fetch started. Currently active page fetches: {}", amountOfActivePageFetchesCounter.get());
}
}

public void setObserverCompleted() {
if (observerCompleted.compareAndSet(false, true)) {
if (pageFetchStarted.get()) {
amountOfActivePageFetchesCounter.decrementAndGet();
}
if (amountOfActiveObserversCounter.decrementAndGet() <= 0 && isShutdown.get()) {
lock.lock();
try {
notTerminated.signalAll();
} finally {
lock.unlock();
}
}
LOG.trace("Client disconnected. Currently active clients: {}. Currently active page fetches: {}",
amountOfActiveObserversCounter.get(), amountOfActivePageFetchesCounter.get());
}
}
public void startPageComplete() {
amountOfActiveObserversCounter.incrementAndGet();
LOG.trace("Client connected. Currently active clients: {}", amountOfActiveObserversCounter.get());
}

public void setFetchCompleted() {
if (timeout != null) {
// Stop the timeout cancel handler
timeout.cancel(false);
public void setObserverCompleted() {
if (amountOfActiveObserversCounter.decrementAndGet() <= 0 && isShutdown.get()) {
lock.lock();
try {
notTerminated.signalAll();
} finally {
lock.unlock();
}
}
LOG.trace("Client disconnected. Currently active clients: {}.",
amountOfActiveObserversCounter.get());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,9 @@ public FrontierApiServer(ServerBuilder<?> serverBuilder, Frontier frontier) {
ConcurrencyLimitServerInterceptor.newBuilder(
new GrpcServerLimiterBuilder()
.partitionByMethod()
.partition(FrontierGrpc.getCrawlSeedMethod().getFullMethodName(), 0.38)
.partition(FrontierGrpc.getGetNextPageMethod().getFullMethodName(), 0.02)
.partition(FrontierGrpc.getCrawlSeedMethod().getFullMethodName(), 0.30)
.partition(FrontierGrpc.getGetNextPageMethod().getFullMethodName(), 0.0001)
.partition(FrontierGrpc.getPageCompletedMethod().getFullMethodName(), 0.0999)
.partition(FrontierGrpc.getBusyCrawlHostGroupCountMethod().getFullMethodName(), 0.15)
.partition(FrontierGrpc.getQueueCountForCrawlExecutionMethod().getFullMethodName(), 0.15)
.partition(FrontierGrpc.getQueueCountForCrawlHostGroupMethod().getFullMethodName(), 0.15)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,13 @@ public void crawlSeed(CrawlSeedRequest request, StreamObserver<CrawlExecutionId>
}

@Override
public StreamObserver<PageHarvest> getNextPage(StreamObserver<PageHarvestSpec> responseObserver) {
return new GetNextPageHandler(ctx.newRequestContext((ServerCallStreamObserver) responseObserver));
public void getNextPage(Empty request, StreamObserver<PageHarvestSpec> responseObserver) {
GetNextPageHandler.onNext(ctx, responseObserver);
}

@Override
public StreamObserver<PageHarvest> pageCompleted(StreamObserver<Empty> responseObserver) {
return new PageCompletedHandler(ctx, (ServerCallStreamObserver) responseObserver);
}

@Override
Expand Down
Loading

0 comments on commit 866c7ca

Please sign in to comment.