From 1d823cace9be10389086795ed7357c7eaf7660c0 Mon Sep 17 00:00:00 2001 From: tsantalis Date: Thu, 10 Oct 2024 10:11:54 -0400 Subject: [PATCH] Disable scope in comment diff when there is control flow restructuring https://github.com/eclipse-jgit/jgit/commit/5d8a9f6f3 --- .../decomposition/UMLOperationBodyMapper.java | 42 +- .../test/TestJavadocDiff.java | 3 +- ...3ac43c6b1c48cdfad55e545171ea3-comments.txt | 14 + .../jgit/internal/storage/pack/DeltaTask.java | 318 +++ .../internal/storage/pack/DeltaWindow.java | 506 ++++ .../internal/storage/pack/PackWriter.java | 2452 ++++++++++++++++ ...a9f6f3f43ac43c6b1c48cdfad55e545171ea3.json | 1 + .../jgit/internal/storage/pack/DeltaTask.java | 332 +++ .../internal/storage/pack/DeltaWindow.java | 511 ++++ .../internal/storage/pack/PackWriter.java | 2475 +++++++++++++++++ 10 files changed, 6647 insertions(+), 7 deletions(-) create mode 100644 src/test/resources/mappings/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3-comments.txt create mode 100644 src/test/resources/oracle/commits/jgit-21e4aa2b9eaf392825e52ada6034cc3044c69c67/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java create mode 100644 src/test/resources/oracle/commits/jgit-21e4aa2b9eaf392825e52ada6034cc3044c69c67/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java create mode 100644 src/test/resources/oracle/commits/jgit-21e4aa2b9eaf392825e52ada6034cc3044c69c67/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java create mode 100644 src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3.json create mode 100644 src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java create mode 100644 src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java create mode 100644 src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java diff --git a/src/main/java/gr/uom/java/xmi/decomposition/UMLOperationBodyMapper.java b/src/main/java/gr/uom/java/xmi/decomposition/UMLOperationBodyMapper.java index 146d1c153..ff2da994c 100644 --- a/src/main/java/gr/uom/java/xmi/decomposition/UMLOperationBodyMapper.java +++ b/src/main/java/gr/uom/java/xmi/decomposition/UMLOperationBodyMapper.java @@ -2654,11 +2654,26 @@ else if(streamAPIStatements1.size() > 0 && streamAPIStatements2.size() == 0) { AbstractCodeMapping parentMapping = findParentMappingContainingOperationInvocation(); Set deletedComments = new LinkedHashSet(); if(parentMapping != null) { - for(UMLComment deletedComment : parentMapper.commentListDiff.getDeletedComments()) { - if(parentMapping.getFragment1().getLocationInfo().subsumes(deletedComment.getLocationInfo())) { - deletedComments.add(deletedComment); + boolean containsReturn = false; + if(parentMapping.getFragment1() instanceof CompositeStatementObject) { + CompositeStatementObject comp = (CompositeStatementObject)parentMapping.getFragment1(); + for(AbstractCodeFragment fragment : comp.getLeaves()) { + if(fragment.getString().equals(JAVA.RETURN_STATEMENT)) { + containsReturn = true; + break; + } } } + if(!containsReturn) { + for(UMLComment deletedComment : parentMapper.commentListDiff.getDeletedComments()) { + if(parentMapping.getFragment1().getLocationInfo().subsumes(deletedComment.getLocationInfo())) { + deletedComments.add(deletedComment); + } + } + } + else { + deletedComments.addAll(parentMapper.commentListDiff.getDeletedComments()); + } } else { deletedComments.addAll(parentMapper.commentListDiff.getDeletedComments()); @@ -3054,11 +3069,26 @@ else if(streamAPIStatements1.size() > 0 && streamAPIStatements2.size() == 0) { AbstractCodeMapping parentMapping = findParentMappingContainingOperationInvocation(); Set addedComments = new LinkedHashSet(); if(parentMapping != null) { - for(UMLComment addedComment : parentMapper.commentListDiff.getAddedComments()) { - if(parentMapping.getFragment2().getLocationInfo().subsumes(addedComment.getLocationInfo())) { - addedComments.add(addedComment); + boolean containsReturn = false; + if(parentMapping.getFragment2() instanceof CompositeStatementObject) { + CompositeStatementObject comp = (CompositeStatementObject)parentMapping.getFragment2(); + for(AbstractCodeFragment fragment : comp.getLeaves()) { + if(fragment.getString().equals(JAVA.RETURN_STATEMENT)) { + containsReturn = true; + break; + } } } + if(!containsReturn) { + for(UMLComment addedComment : parentMapper.commentListDiff.getAddedComments()) { + if(parentMapping.getFragment2().getLocationInfo().subsumes(addedComment.getLocationInfo())) { + addedComments.add(addedComment); + } + } + } + else { + addedComments.addAll(parentMapper.commentListDiff.getAddedComments()); + } } else { addedComments.addAll(parentMapper.commentListDiff.getAddedComments()); diff --git a/src/test/java/org/refactoringminer/test/TestJavadocDiff.java b/src/test/java/org/refactoringminer/test/TestJavadocDiff.java index 47babd508..fa6a7e2f6 100644 --- a/src/test/java/org/refactoringminer/test/TestJavadocDiff.java +++ b/src/test/java/org/refactoringminer/test/TestJavadocDiff.java @@ -120,7 +120,8 @@ public void testMethodCommentMappings(String url, String commitId, String classN @CsvSource({ "https://github.com/jOOQ/jOOQ.git, 58a4e74d28073e7c6f15d1f225ac1c2fd9aa4357, org.jooq.tools.Convert.ConvertAll, jOOQ-58a4e74d28073e7c6f15d1f225ac1c2fd9aa4357-comments.txt", "https://github.com/thymeleaf/thymeleaf.git, 378ba37750a9cb1b19a6db434dfa59308f721ea6, org.thymeleaf.templateparser.reader.BlockAwareReader, thymeleaf-378ba37750a9cb1b19a6db434dfa59308f721ea6-comments.txt", - "https://github.com/eclipse-vertx/vert.x.git, 32a8c9086040fd6d6fa11a214570ee4f75a4301f, io.vertx.core.http.impl.HttpServerImpl.ServerHandler, vertx-32a8c9086040fd6d6fa11a214570ee4f75a4301f-comments.txt" + "https://github.com/eclipse-vertx/vert.x.git, 32a8c9086040fd6d6fa11a214570ee4f75a4301f, io.vertx.core.http.impl.HttpServerImpl.ServerHandler, vertx-32a8c9086040fd6d6fa11a214570ee4f75a4301f-comments.txt", + "https://github.com/eclipse-jgit/jgit.git, 5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3, org.eclipse.jgit.internal.storage.pack.PackWriter, jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3-comments.txt" }) public void testMethodCommentMultiMappings(String url, String commitId, String className, String testResultFileName) throws Exception { final List actual = new ArrayList<>(); diff --git a/src/test/resources/mappings/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3-comments.txt b/src/test/resources/mappings/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3-comments.txt new file mode 100644 index 000000000..e1c85a064 --- /dev/null +++ b/src/test/resources/mappings/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3-comments.txt @@ -0,0 +1,14 @@ +private searchForDeltas(monitor ProgressMonitor, list ObjectToPack[], cnt int) : void -> private parallelDeltaSearch(monitor ProgressMonitor, list ObjectToPack[], cnt int, threads int) : void +line range:1353-1353==line range:1375-1375 +line range:1354-1354==line range:1376-1376 +line range:1371-1371==line range:1393-1393 +line range:1372-1372==line range:1394-1394 +line range:1373-1373==line range:1395-1395 +line range:1388-1388==line range:1410-1410 +line range:1389-1389==line range:1411-1411 +line range:1390-1390==line range:1412-1412 +line range:1396-1396==line range:1418-1418 +line range:1397-1397==line range:1419-1419 +line range:1398-1398==line range:1420-1420 +line range:1350-1350==line range:1372-1372 +private searchForDeltas(monitor ProgressMonitor, list ObjectToPack[], cnt int) : void -> private searchForDeltas(monitor ProgressMonitor, list ObjectToPack[], cnt int) : void \ No newline at end of file diff --git a/src/test/resources/oracle/commits/jgit-21e4aa2b9eaf392825e52ada6034cc3044c69c67/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java b/src/test/resources/oracle/commits/jgit-21e4aa2b9eaf392825e52ada6034cc3044c69c67/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java new file mode 100644 index 000000000..ca2fff688 --- /dev/null +++ b/src/test/resources/oracle/commits/jgit-21e4aa2b9eaf392825e52ada6034cc3044c69c67/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java @@ -0,0 +1,318 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.pack; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.Callable; + +import org.eclipse.jgit.lib.ObjectReader; +import org.eclipse.jgit.lib.ThreadSafeProgressMonitor; +import org.eclipse.jgit.storage.pack.PackConfig; + +final class DeltaTask implements Callable { + static final class Block { + private static final int MIN_TOP_PATH = 50 << 20; + + final List tasks; + final int threads; + final PackConfig config; + final ObjectReader templateReader; + final DeltaCache dc; + final ThreadSafeProgressMonitor pm; + final ObjectToPack[] list; + final int beginIndex; + final int endIndex; + + private long totalWeight; + + Block(int threads, PackConfig config, ObjectReader reader, + DeltaCache dc, ThreadSafeProgressMonitor pm, + ObjectToPack[] list, int begin, int end) { + this.tasks = new ArrayList(threads); + this.threads = threads; + this.config = config; + this.templateReader = reader; + this.dc = dc; + this.pm = pm; + this.list = list; + this.beginIndex = begin; + this.endIndex = end; + } + + synchronized DeltaWindow stealWork(DeltaTask forThread) { + for (;;) { + DeltaTask maxTask = null; + Slice maxSlice = null; + int maxWork = 0; + + for (DeltaTask task : tasks) { + Slice s = task.remaining(); + if (s != null && maxWork < s.size()) { + maxTask = task; + maxSlice = s; + maxWork = s.size(); + } + } + if (maxTask == null) + return null; + if (maxTask.tryStealWork(maxSlice)) + return forThread.initWindow(maxSlice); + } + } + + void partitionTasks() { + ArrayList topPaths = computeTopPaths(); + Iterator topPathItr = topPaths.iterator(); + int nextTop = 0; + long weightPerThread = totalWeight / threads; + for (int i = beginIndex; i < endIndex;) { + DeltaTask task = new DeltaTask(this); + long w = 0; + + // Assign the thread one top path. + if (topPathItr.hasNext()) { + WeightedPath p = topPathItr.next(); + w += p.weight; + task.add(p.slice); + } + + // Assign the task thread ~average weight. + int s = i; + for (; w < weightPerThread && i < endIndex;) { + if (nextTop < topPaths.size() + && i == topPaths.get(nextTop).slice.beginIndex) { + if (s < i) + task.add(new Slice(s, i)); + s = i = topPaths.get(nextTop++).slice.endIndex; + } else + w += list[i++].getWeight(); + } + + // Round up the slice to the end of a path. + if (s < i) { + int h = list[i - 1].getPathHash(); + while (i < endIndex) { + if (h == list[i].getPathHash()) + i++; + else + break; + } + task.add(new Slice(s, i)); + } + if (!task.slices.isEmpty()) + tasks.add(task); + } + while (topPathItr.hasNext()) { + WeightedPath p = topPathItr.next(); + DeltaTask task = new DeltaTask(this); + task.add(p.slice); + tasks.add(task); + } + + topPaths = null; + } + + private ArrayList computeTopPaths() { + ArrayList topPaths = new ArrayList( + threads); + int cp = beginIndex; + int ch = list[cp].getPathHash(); + long cw = list[cp].getWeight(); + totalWeight = list[cp].getWeight(); + + for (int i = cp + 1; i < endIndex; i++) { + ObjectToPack o = list[i]; + if (ch != o.getPathHash()) { + if (MIN_TOP_PATH < cw) { + if (topPaths.size() < threads) { + Slice s = new Slice(cp, i); + topPaths.add(new WeightedPath(cw, s)); + if (topPaths.size() == threads) + Collections.sort(topPaths); + } else if (topPaths.get(0).weight < cw) { + Slice s = new Slice(cp, i); + WeightedPath p = new WeightedPath(cw, s); + topPaths.set(0, p); + if (p.compareTo(topPaths.get(1)) > 0) + Collections.sort(topPaths); + } + } + cp = i; + ch = o.getPathHash(); + cw = 0; + } + if (o.isEdge() || o.doNotAttemptDelta()) + continue; + cw += o.getWeight(); + totalWeight += o.getWeight(); + } + + // Sort by starting index to identify gaps later. + Collections.sort(topPaths, new Comparator() { + public int compare(WeightedPath a, WeightedPath b) { + return a.slice.beginIndex - b.slice.beginIndex; + } + }); + return topPaths; + } + } + + static final class WeightedPath implements Comparable { + final long weight; + final Slice slice; + + WeightedPath(long weight, Slice s) { + this.weight = weight; + this.slice = s; + } + + public int compareTo(WeightedPath o) { + int cmp = Long.signum(weight - o.weight); + if (cmp != 0) + return cmp; + return slice.beginIndex - o.slice.beginIndex; + } + } + + static final class Slice { + final int beginIndex; + final int endIndex; + + Slice(int b, int e) { + beginIndex = b; + endIndex = e; + } + + final int size() { + return endIndex - beginIndex; + } + } + + private final Block block; + private final LinkedList slices; + + private ObjectReader or; + private DeltaWindow dw; + + DeltaTask(Block b) { + this.block = b; + this.slices = new LinkedList(); + } + + void add(Slice s) { + if (!slices.isEmpty()) { + Slice last = slices.getLast(); + if (last.endIndex == s.beginIndex) { + slices.removeLast(); + slices.add(new Slice(last.beginIndex, s.endIndex)); + return; + } + } + slices.add(s); + } + + public Object call() throws Exception { + or = block.templateReader.newReader(); + try { + DeltaWindow w; + for (;;) { + synchronized (this) { + if (slices.isEmpty()) + break; + w = initWindow(slices.removeFirst()); + } + runWindow(w); + } + while ((w = block.stealWork(this)) != null) + runWindow(w); + } finally { + block.pm.endWorker(); + or.release(); + or = null; + } + return null; + } + + DeltaWindow initWindow(Slice s) { + DeltaWindow w = new DeltaWindow(block.config, block.dc, + or, block.pm, + block.list, s.beginIndex, s.endIndex); + synchronized (this) { + dw = w; + } + return w; + } + + private void runWindow(DeltaWindow w) throws IOException { + try { + w.search(); + } finally { + synchronized (this) { + dw = null; + } + } + } + + synchronized Slice remaining() { + if (!slices.isEmpty()) + return slices.getLast(); + DeltaWindow d = dw; + return d != null ? d.remaining() : null; + } + + synchronized boolean tryStealWork(Slice s) { + if (!slices.isEmpty() && slices.getLast().beginIndex == s.beginIndex) { + slices.removeLast(); + return true; + } + DeltaWindow d = dw; + return d != null ? d.tryStealWork(s) : false; + } +} diff --git a/src/test/resources/oracle/commits/jgit-21e4aa2b9eaf392825e52ada6034cc3044c69c67/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java b/src/test/resources/oracle/commits/jgit-21e4aa2b9eaf392825e52ada6034cc3044c69c67/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java new file mode 100644 index 000000000..cc7fac800 --- /dev/null +++ b/src/test/resources/oracle/commits/jgit-21e4aa2b9eaf392825e52ada6034cc3044c69c67/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java @@ -0,0 +1,506 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.pack; + +import java.io.EOFException; +import java.io.IOException; +import java.io.OutputStream; +import java.util.zip.Deflater; + +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.LargeObjectException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.lib.ObjectReader; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.storage.pack.PackConfig; +import org.eclipse.jgit.util.TemporaryBuffer; + +final class DeltaWindow { + private static final boolean NEXT_RES = false; + private static final boolean NEXT_SRC = true; + + private final PackConfig config; + private final DeltaCache deltaCache; + private final ObjectReader reader; + private final ProgressMonitor monitor; + + /** Maximum number of bytes to admit to the window at once. */ + private final long maxMemory; + + /** Maximum depth we should create for any delta chain. */ + private final int maxDepth; + + private final ObjectToPack[] toSearch; + private int cur; + private int end; + + /** Amount of memory we have loaded right now. */ + private long loaded; + + // The object we are currently considering needs a lot of state: + + /** Window entry of the object we are currently considering. */ + private DeltaWindowEntry res; + + /** If we have chosen a base, the window entry it was created from. */ + private DeltaWindowEntry bestBase; + private int deltaLen; + private Object deltaBuf; + + /** Used to compress cached deltas. */ + private Deflater deflater; + + DeltaWindow(PackConfig pc, DeltaCache dc, ObjectReader or, + ProgressMonitor pm, + ObjectToPack[] in, int beginIndex, int endIndex) { + config = pc; + deltaCache = dc; + reader = or; + monitor = pm; + toSearch = in; + cur = beginIndex; + end = endIndex; + + maxMemory = Math.max(0, config.getDeltaSearchMemoryLimit()); + maxDepth = config.getMaxDeltaDepth(); + res = DeltaWindowEntry.createWindow(config.getDeltaSearchWindowSize()); + } + + synchronized DeltaTask.Slice remaining() { + int e = end; + int halfRemaining = (e - cur) >>> 1; + if (0 == halfRemaining) + return null; + + int split = e - halfRemaining; + int h = toSearch[split].getPathHash(); + + // Attempt to split on the next path after the 50% split point. + for (int n = split + 1; n < e; n++) { + if (h != toSearch[n].getPathHash()) + return new DeltaTask.Slice(n, e); + } + + if (h != toSearch[cur].getPathHash()) { + // Try to split on the path before the 50% split point. + // Do not split the path currently being processed. + for (int p = split - 1; cur < p; p--) { + if (h != toSearch[p].getPathHash()) + return new DeltaTask.Slice(p + 1, e); + } + } + return null; + } + + synchronized boolean tryStealWork(DeltaTask.Slice s) { + if (s.beginIndex <= cur || end <= s.beginIndex) + return false; + end = s.beginIndex; + return true; + } + + void search() throws IOException { + try { + for (;;) { + ObjectToPack next; + synchronized (this) { + if (end <= cur) + break; + next = toSearch[cur++]; + } + if (maxMemory != 0) { + clear(res); + final long need = estimateSize(next); + DeltaWindowEntry n = res.next; + for (; maxMemory < loaded + need && n != res; n = n.next) + clear(n); + } + res.set(next); + + if (res.object.isEdge() || res.object.doNotAttemptDelta()) { + // We don't actually want to make a delta for + // them, just need to push them into the window + // so they can be read by other objects. + // + keepInWindow(); + } else { + // Search for a delta for the current window slot. + // + monitor.update(1); + searchInWindow(); + } + } + } finally { + if (deflater != null) + deflater.end(); + } + } + + private static long estimateSize(ObjectToPack ent) { + return DeltaIndex.estimateIndexSize(ent.getWeight()); + } + + private static long estimateIndexSize(DeltaWindowEntry ent) { + if (ent.buffer == null) + return estimateSize(ent.object); + + int len = ent.buffer.length; + return DeltaIndex.estimateIndexSize(len) - len; + } + + private void clear(DeltaWindowEntry ent) { + if (ent.index != null) + loaded -= ent.index.getIndexSize(); + else if (ent.buffer != null) + loaded -= ent.buffer.length; + ent.set(null); + } + + private void searchInWindow() throws IOException { + // Loop through the window backwards, considering every entry. + // This lets us look at the bigger objects that came before. + for (DeltaWindowEntry src = res.prev; src != res; src = src.prev) { + if (src.empty()) + break; + if (delta(src) /* == NEXT_SRC */) + continue; + bestBase = null; + deltaBuf = null; + return; + } + + // We couldn't find a suitable delta for this object, but it may + // still be able to act as a base for another one. + if (bestBase == null) { + keepInWindow(); + return; + } + + // Select this best matching delta as the base for the object. + // + ObjectToPack srcObj = bestBase.object; + ObjectToPack resObj = res.object; + if (srcObj.isEdge()) { + // The source (the delta base) is an edge object outside of the + // pack. Its part of the common base set that the peer already + // has on hand, so we don't want to send it. We have to store + // an ObjectId and *NOT* an ObjectToPack for the base to ensure + // the base isn't included in the outgoing pack file. + resObj.setDeltaBase(srcObj.copy()); + } else { + // The base is part of the pack we are sending, so it should be + // a direct pointer to the base. + resObj.setDeltaBase(srcObj); + } + + int depth = srcObj.getDeltaDepth() + 1; + resObj.setDeltaDepth(depth); + resObj.clearReuseAsIs(); + cacheDelta(srcObj, resObj); + + if (depth < maxDepth) { + // Reorder the window so that the best base will be tested + // first for the next object, and the current object will + // be the second candidate to consider before any others. + res.makeNext(bestBase); + res = bestBase.next; + } + + bestBase = null; + deltaBuf = null; + } + + private boolean delta(final DeltaWindowEntry src) + throws IOException { + // Objects must use only the same type as their delta base. + if (src.type() != res.type()) { + keepInWindow(); + return NEXT_RES; + } + + // If the sizes are radically different, this is a bad pairing. + if (res.size() < src.size() >>> 4) + return NEXT_SRC; + + int msz = deltaSizeLimit(src); + if (msz <= 8) // Nearly impossible to fit useful delta. + return NEXT_SRC; + + // If we have to insert a lot to make this work, find another. + if (res.size() - src.size() > msz) + return NEXT_SRC; + + DeltaIndex srcIndex; + try { + srcIndex = index(src); + } catch (LargeObjectException tooBig) { + // If the source is too big to work on, skip it. + return NEXT_SRC; + } catch (IOException notAvailable) { + if (src.object.isEdge()) // Missing edges are OK. + return NEXT_SRC; + throw notAvailable; + } + + byte[] resBuf; + try { + resBuf = buffer(res); + } catch (LargeObjectException tooBig) { + // If its too big, move on to another item. + return NEXT_RES; + } + + try { + OutputStream delta = msz <= (8 << 10) + ? new ArrayStream(msz) + : new TemporaryBuffer.Heap(msz); + if (srcIndex.encode(delta, resBuf, msz)) + selectDeltaBase(src, delta); + } catch (IOException deltaTooBig) { + // Unlikely, encoder should see limit and return false. + } + return NEXT_SRC; + } + + private void selectDeltaBase(DeltaWindowEntry src, OutputStream delta) { + bestBase = src; + + if (delta instanceof ArrayStream) { + ArrayStream a = (ArrayStream) delta; + deltaBuf = a.buf; + deltaLen = a.cnt; + } else { + TemporaryBuffer.Heap b = (TemporaryBuffer.Heap) delta; + deltaBuf = b; + deltaLen = (int) b.length(); + } + } + + private int deltaSizeLimit(DeltaWindowEntry src) { + if (bestBase == null) { + // Any delta should be no more than 50% of the original size + // (for text files deflate of whole form should shrink 50%). + int n = res.size() >>> 1; + + // Evenly distribute delta size limits over allowed depth. + // If src is non-delta (depth = 0), delta <= 50% of original. + // If src is almost at limit (9/10), delta <= 10% of original. + return n * (maxDepth - src.depth()) / maxDepth; + } + + // With a delta base chosen any new delta must be "better". + // Retain the distribution described above. + int d = bestBase.depth(); + int n = deltaLen; + + // If src is whole (depth=0) and base is near limit (depth=9/10) + // any delta using src can be 10x larger and still be better. + // + // If src is near limit (depth=9/10) and base is whole (depth=0) + // a new delta dependent on src must be 1/10th the size. + return n * (maxDepth - src.depth()) / (maxDepth - d); + } + + private void cacheDelta(ObjectToPack srcObj, ObjectToPack resObj) { + if (deltaCache.canCache(deltaLen, srcObj, resObj)) { + try { + byte[] zbuf = new byte[deflateBound(deltaLen)]; + ZipStream zs = new ZipStream(deflater(), zbuf); + if (deltaBuf instanceof byte[]) + zs.write((byte[]) deltaBuf, 0, deltaLen); + else + ((TemporaryBuffer.Heap) deltaBuf).writeTo(zs, null); + deltaBuf = null; + int len = zs.finish(); + + resObj.setCachedDelta(deltaCache.cache(zbuf, len, deltaLen)); + resObj.setCachedSize(deltaLen); + } catch (IOException err) { + deltaCache.credit(deltaLen); + } catch (OutOfMemoryError err) { + deltaCache.credit(deltaLen); + } + } + } + + private static int deflateBound(int insz) { + return insz + ((insz + 7) >> 3) + ((insz + 63) >> 6) + 11; + } + + private void keepInWindow() { + res = res.next; + } + + private DeltaIndex index(DeltaWindowEntry ent) + throws MissingObjectException, IncorrectObjectTypeException, + IOException, LargeObjectException { + DeltaIndex idx = ent.index; + if (idx == null) { + checkLoadable(ent, estimateIndexSize(ent)); + + try { + idx = new DeltaIndex(buffer(ent)); + } catch (OutOfMemoryError noMemory) { + LargeObjectException.OutOfMemory e; + e = new LargeObjectException.OutOfMemory(noMemory); + e.setObjectId(ent.object); + throw e; + } + if (maxMemory != 0) + loaded += idx.getIndexSize() - idx.getSourceSize(); + ent.index = idx; + } + return idx; + } + + private byte[] buffer(DeltaWindowEntry ent) throws MissingObjectException, + IncorrectObjectTypeException, IOException, LargeObjectException { + byte[] buf = ent.buffer; + if (buf == null) { + checkLoadable(ent, ent.size()); + + buf = PackWriter.buffer(config, reader, ent.object); + if (maxMemory != 0) + loaded += buf.length; + ent.buffer = buf; + } + return buf; + } + + private void checkLoadable(DeltaWindowEntry ent, long need) { + if (maxMemory == 0) + return; + + DeltaWindowEntry n = res.next; + for (; maxMemory < loaded + need; n = n.next) { + clear(n); + if (n == ent) + throw new LargeObjectException.ExceedsLimit( + maxMemory, loaded + need); + } + } + + private Deflater deflater() { + if (deflater == null) + deflater = new Deflater(config.getCompressionLevel()); + else + deflater.reset(); + return deflater; + } + + static final class ZipStream extends OutputStream { + private final Deflater deflater; + + private final byte[] zbuf; + + private int outPtr; + + ZipStream(Deflater deflater, byte[] zbuf) { + this.deflater = deflater; + this.zbuf = zbuf; + } + + int finish() throws IOException { + deflater.finish(); + for (;;) { + if (outPtr == zbuf.length) + throw new EOFException(); + + int n = deflater.deflate(zbuf, outPtr, zbuf.length - outPtr); + if (n == 0) { + if (deflater.finished()) + return outPtr; + throw new IOException(); + } + outPtr += n; + } + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + deflater.setInput(b, off, len); + for (;;) { + if (outPtr == zbuf.length) + throw new EOFException(); + + int n = deflater.deflate(zbuf, outPtr, zbuf.length - outPtr); + if (n == 0) { + if (deflater.needsInput()) + break; + throw new IOException(); + } + outPtr += n; + } + } + + @Override + public void write(int b) throws IOException { + throw new UnsupportedOperationException(); + } + } + + static final class ArrayStream extends OutputStream { + final byte[] buf; + int cnt; + + ArrayStream(int max) { + buf = new byte[max]; + } + + @Override + public void write(int b) throws IOException { + if (cnt == buf.length) + throw new IOException(); + buf[cnt++] = (byte) b; + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + if (len > buf.length - cnt) + throw new IOException(); + System.arraycopy(b, off, buf, cnt, len); + cnt += len; + } + } +} diff --git a/src/test/resources/oracle/commits/jgit-21e4aa2b9eaf392825e52ada6034cc3044c69c67/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java b/src/test/resources/oracle/commits/jgit-21e4aa2b9eaf392825e52ada6034cc3044c69c67/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java new file mode 100644 index 000000000..a3ef27c21 --- /dev/null +++ b/src/test/resources/oracle/commits/jgit-21e4aa2b9eaf392825e52ada6034cc3044c69c67/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java @@ -0,0 +1,2452 @@ +/* + * Copyright (C) 2008-2010, Google Inc. + * Copyright (C) 2008, Marek Zawirski + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.pack; + +import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA; +import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_WHOLE; +import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH; +import static org.eclipse.jgit.lib.Constants.OBJ_BLOB; +import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT; +import static org.eclipse.jgit.lib.Constants.OBJ_TAG; +import static org.eclipse.jgit.lib.Constants.OBJ_TREE; + +import java.io.IOException; +import java.io.OutputStream; +import java.lang.ref.WeakReference; +import java.security.MessageDigest; +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executor; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.zip.CRC32; +import java.util.zip.CheckedOutputStream; +import java.util.zip.Deflater; +import java.util.zip.DeflaterOutputStream; + +import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.LargeObjectException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException; +import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.internal.storage.file.PackBitmapIndexBuilder; +import org.eclipse.jgit.internal.storage.file.PackBitmapIndexWriterV1; +import org.eclipse.jgit.internal.storage.file.PackIndexWriter; +import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.AsyncObjectSizeQueue; +import org.eclipse.jgit.lib.BatchingProgressMonitor; +import org.eclipse.jgit.lib.BitmapIndex; +import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder; +import org.eclipse.jgit.lib.BitmapObject; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.NullProgressMonitor; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectIdOwnerMap; +import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.ObjectReader; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.lib.ThreadSafeProgressMonitor; +import org.eclipse.jgit.revwalk.AsyncRevObjectQueue; +import org.eclipse.jgit.revwalk.DepthWalk; +import org.eclipse.jgit.revwalk.ObjectWalk; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevFlag; +import org.eclipse.jgit.revwalk.RevObject; +import org.eclipse.jgit.revwalk.RevSort; +import org.eclipse.jgit.revwalk.RevTag; +import org.eclipse.jgit.revwalk.RevTree; +import org.eclipse.jgit.storage.pack.PackConfig; +import org.eclipse.jgit.util.BlockList; +import org.eclipse.jgit.util.TemporaryBuffer; + +/** + *

+ * PackWriter class is responsible for generating pack files from specified set + * of objects from repository. This implementation produce pack files in format + * version 2. + *

+ *

+ * Source of objects may be specified in two ways: + *

    + *
  • (usually) by providing sets of interesting and uninteresting objects in + * repository - all interesting objects and their ancestors except uninteresting + * objects and their ancestors will be included in pack, or
  • + *
  • by providing iterator of {@link RevObject} specifying exact list and + * order of objects in pack
  • + *
+ * Typical usage consists of creating instance intended for some pack, + * configuring options, preparing the list of objects by calling + * {@link #preparePack(Iterator)} or + * {@link #preparePack(ProgressMonitor, Collection, Collection)}, and finally + * producing the stream with {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}. + *

+ *

+ * Class provide set of configurable options and {@link ProgressMonitor} + * support, as operations may take a long time for big repositories. Deltas + * searching algorithm is NOT IMPLEMENTED yet - this implementation + * relies only on deltas and objects reuse. + *

+ *

+ * This class is not thread safe, it is intended to be used in one thread, with + * one instance per created pack. Subsequent calls to writePack result in + * undefined behavior. + *

+ */ +public class PackWriter { + private static final int PACK_VERSION_GENERATED = 2; + + /** A collection of object ids. */ + public interface ObjectIdSet { + /** + * Returns true if the objectId is contained within the collection. + * + * @param objectId + * the objectId to find + * @return whether the collection contains the objectId. + */ + boolean contains(AnyObjectId objectId); + } + + private static final Map, Boolean> instances = + new ConcurrentHashMap, Boolean>(); + + private static final Iterable instancesIterable = new Iterable() { + public Iterator iterator() { + return new Iterator() { + private final Iterator> it = + instances.keySet().iterator(); + private PackWriter next; + + public boolean hasNext() { + if (next != null) + return true; + while (it.hasNext()) { + WeakReference ref = it.next(); + next = ref.get(); + if (next != null) + return true; + it.remove(); + } + return false; + } + + public PackWriter next() { + if (hasNext()) { + PackWriter result = next; + next = null; + return result; + } + throw new NoSuchElementException(); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + + /** @return all allocated, non-released PackWriters instances. */ + public static Iterable getInstances() { + return instancesIterable; + } + + @SuppressWarnings("unchecked") + private final BlockList objectsLists[] = new BlockList[OBJ_TAG + 1]; + { + objectsLists[OBJ_COMMIT] = new BlockList(); + objectsLists[OBJ_TREE] = new BlockList(); + objectsLists[OBJ_BLOB] = new BlockList(); + objectsLists[OBJ_TAG] = new BlockList(); + } + + private final ObjectIdOwnerMap objectsMap = new ObjectIdOwnerMap(); + + // edge objects for thin packs + private List edgeObjects = new BlockList(); + + // Objects the client is known to have already. + private BitmapBuilder haveObjects; + + private List cachedPacks = new ArrayList(2); + + private Set tagTargets = Collections.emptySet(); + + private ObjectIdSet[] excludeInPacks; + + private ObjectIdSet excludeInPackLast; + + private Deflater myDeflater; + + private final ObjectReader reader; + + /** {@link #reader} recast to the reuse interface, if it supports it. */ + private final ObjectReuseAsIs reuseSupport; + + private final PackConfig config; + + private final Statistics stats; + + private final MutableState state; + + private final WeakReference selfRef; + + private Statistics.ObjectType typeStats; + + private List sortedByName; + + private byte packcsum[]; + + private boolean deltaBaseAsOffset; + + private boolean reuseDeltas; + + private boolean reuseDeltaCommits; + + private boolean reuseValidate; + + private boolean thin; + + private boolean useCachedPacks; + + private boolean useBitmaps; + + private boolean ignoreMissingUninteresting = true; + + private boolean pruneCurrentObjectList; + + private boolean shallowPack; + + private boolean canBuildBitmaps; + + private boolean indexDisabled; + + private int depth; + + private Collection unshallowObjects; + + private PackBitmapIndexBuilder writeBitmaps; + + private CRC32 crc32; + + /** + * Create writer for specified repository. + *

+ * Objects for packing are specified in {@link #preparePack(Iterator)} or + * {@link #preparePack(ProgressMonitor, Collection, Collection)}. + * + * @param repo + * repository where objects are stored. + */ + public PackWriter(final Repository repo) { + this(repo, repo.newObjectReader()); + } + + /** + * Create a writer to load objects from the specified reader. + *

+ * Objects for packing are specified in {@link #preparePack(Iterator)} or + * {@link #preparePack(ProgressMonitor, Collection, Collection)}. + * + * @param reader + * reader to read from the repository with. + */ + public PackWriter(final ObjectReader reader) { + this(new PackConfig(), reader); + } + + /** + * Create writer for specified repository. + *

+ * Objects for packing are specified in {@link #preparePack(Iterator)} or + * {@link #preparePack(ProgressMonitor, Collection, Collection)}. + * + * @param repo + * repository where objects are stored. + * @param reader + * reader to read from the repository with. + */ + public PackWriter(final Repository repo, final ObjectReader reader) { + this(new PackConfig(repo), reader); + } + + /** + * Create writer with a specified configuration. + *

+ * Objects for packing are specified in {@link #preparePack(Iterator)} or + * {@link #preparePack(ProgressMonitor, Collection, Collection)}. + * + * @param config + * configuration for the pack writer. + * @param reader + * reader to read from the repository with. + */ + public PackWriter(final PackConfig config, final ObjectReader reader) { + this.config = config; + this.reader = reader; + if (reader instanceof ObjectReuseAsIs) + reuseSupport = ((ObjectReuseAsIs) reader); + else + reuseSupport = null; + + deltaBaseAsOffset = config.isDeltaBaseAsOffset(); + reuseDeltas = config.isReuseDeltas(); + reuseValidate = true; // be paranoid by default + stats = new Statistics(); + state = new MutableState(); + selfRef = new WeakReference(this); + instances.put(selfRef, Boolean.TRUE); + } + + /** + * Check whether writer can store delta base as an offset (new style + * reducing pack size) or should store it as an object id (legacy style, + * compatible with old readers). + * + * Default setting: {@value PackConfig#DEFAULT_DELTA_BASE_AS_OFFSET} + * + * @return true if delta base is stored as an offset; false if it is stored + * as an object id. + */ + public boolean isDeltaBaseAsOffset() { + return deltaBaseAsOffset; + } + + /** + * Set writer delta base format. Delta base can be written as an offset in a + * pack file (new approach reducing file size) or as an object id (legacy + * approach, compatible with old readers). + * + * Default setting: {@value PackConfig#DEFAULT_DELTA_BASE_AS_OFFSET} + * + * @param deltaBaseAsOffset + * boolean indicating whether delta base can be stored as an + * offset. + */ + public void setDeltaBaseAsOffset(boolean deltaBaseAsOffset) { + this.deltaBaseAsOffset = deltaBaseAsOffset; + } + + /** + * Check if the writer will reuse commits that are already stored as deltas. + * + * @return true if the writer would reuse commits stored as deltas, assuming + * delta reuse is already enabled. + */ + public boolean isReuseDeltaCommits() { + return reuseDeltaCommits; + } + + /** + * Set the writer to reuse existing delta versions of commits. + * + * @param reuse + * if true, the writer will reuse any commits stored as deltas. + * By default the writer does not reuse delta commits. + */ + public void setReuseDeltaCommits(boolean reuse) { + reuseDeltaCommits = reuse; + } + + /** + * Check if the writer validates objects before copying them. + * + * @return true if validation is enabled; false if the reader will handle + * object validation as a side-effect of it consuming the output. + */ + public boolean isReuseValidatingObjects() { + return reuseValidate; + } + + /** + * Enable (or disable) object validation during packing. + * + * @param validate + * if true the pack writer will validate an object before it is + * put into the output. This additional validation work may be + * necessary to avoid propagating corruption from one local pack + * file to another local pack file. + */ + public void setReuseValidatingObjects(boolean validate) { + reuseValidate = validate; + } + + /** @return true if this writer is producing a thin pack. */ + public boolean isThin() { + return thin; + } + + /** + * @param packthin + * a boolean indicating whether writer may pack objects with + * delta base object not within set of objects to pack, but + * belonging to party repository (uninteresting/boundary) as + * determined by set; this kind of pack is used only for + * transport; true - to produce thin pack, false - otherwise. + */ + public void setThin(final boolean packthin) { + thin = packthin; + } + + /** @return true to reuse cached packs. If true index creation isn't available. */ + public boolean isUseCachedPacks() { + return useCachedPacks; + } + + /** + * @param useCached + * if set to true and a cached pack is present, it will be + * appended onto the end of a thin-pack, reducing the amount of + * working set space and CPU used by PackWriter. Enabling this + * feature prevents PackWriter from creating an index for the + * newly created pack, so its only suitable for writing to a + * network client, where the client will make the index. + */ + public void setUseCachedPacks(boolean useCached) { + useCachedPacks = useCached; + } + + /** @return true to use bitmaps for ObjectWalks, if available. */ + public boolean isUseBitmaps() { + return useBitmaps; + } + + /** + * @param useBitmaps + * if set to true, bitmaps will be used when preparing a pack. + */ + public void setUseBitmaps(boolean useBitmaps) { + this.useBitmaps = useBitmaps; + } + + /** @return true if the index file cannot be created by this PackWriter. */ + public boolean isIndexDisabled() { + return indexDisabled || !cachedPacks.isEmpty(); + } + + /** + * @param noIndex + * true to disable creation of the index file. + */ + public void setIndexDisabled(boolean noIndex) { + this.indexDisabled = noIndex; + } + + /** + * @return true to ignore objects that are uninteresting and also not found + * on local disk; false to throw a {@link MissingObjectException} + * out of {@link #preparePack(ProgressMonitor, Collection, Collection)} if an + * uninteresting object is not in the source repository. By default, + * true, permitting gracefully ignoring of uninteresting objects. + */ + public boolean isIgnoreMissingUninteresting() { + return ignoreMissingUninteresting; + } + + /** + * @param ignore + * true if writer should ignore non existing uninteresting + * objects during construction set of objects to pack; false + * otherwise - non existing uninteresting objects may cause + * {@link MissingObjectException} + */ + public void setIgnoreMissingUninteresting(final boolean ignore) { + ignoreMissingUninteresting = ignore; + } + + /** + * Set the tag targets that should be hoisted earlier during packing. + *

+ * Callers may put objects into this set before invoking any of the + * preparePack methods to influence where an annotated tag's target is + * stored within the resulting pack. Typically these will be clustered + * together, and hoisted earlier in the file even if they are ancient + * revisions, allowing readers to find tag targets with better locality. + * + * @param objects + * objects that annotated tags point at. + */ + public void setTagTargets(Set objects) { + tagTargets = objects; + } + + /** + * Configure this pack for a shallow clone. + * + * @param depth + * maximum depth to traverse the commit graph + * @param unshallow + * objects which used to be shallow on the client, but are being + * extended as part of this fetch + */ + public void setShallowPack(int depth, + Collection unshallow) { + this.shallowPack = true; + this.depth = depth; + this.unshallowObjects = unshallow; + } + + /** + * Returns objects number in a pack file that was created by this writer. + * + * @return number of objects in pack. + * @throws IOException + * a cached pack cannot supply its object count. + */ + public long getObjectCount() throws IOException { + if (stats.totalObjects == 0) { + long objCnt = 0; + + objCnt += objectsLists[OBJ_COMMIT].size(); + objCnt += objectsLists[OBJ_TREE].size(); + objCnt += objectsLists[OBJ_BLOB].size(); + objCnt += objectsLists[OBJ_TAG].size(); + + for (CachedPack pack : cachedPacks) + objCnt += pack.getObjectCount(); + return objCnt; + } + return stats.totalObjects; + } + + /** + * Returns the object ids in the pack file that was created by this writer. + * + * This method can only be invoked after + * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has + * been invoked and completed successfully. + * + * @return number of objects in pack. + * @throws IOException + * a cached pack cannot supply its object ids. + */ + public ObjectIdOwnerMap getObjectSet() + throws IOException { + if (!cachedPacks.isEmpty()) + throw new IOException( + JGitText.get().cachedPacksPreventsListingObjects); + + ObjectIdOwnerMap objs = new ObjectIdOwnerMap< + ObjectIdOwnerMap.Entry>(); + for (BlockList objList : objectsLists) { + if (objList != null) { + for (ObjectToPack otp : objList) + objs.add(new ObjectIdOwnerMap.Entry(otp) { + // A new entry that copies the ObjectId + }); + } + } + return objs; + } + + /** + * Add a pack index whose contents should be excluded from the result. + * + * @param idx + * objects in this index will not be in the output pack. + */ + public void excludeObjects(ObjectIdSet idx) { + if (excludeInPacks == null) { + excludeInPacks = new ObjectIdSet[] { idx }; + excludeInPackLast = idx; + } else { + int cnt = excludeInPacks.length; + ObjectIdSet[] newList = new ObjectIdSet[cnt + 1]; + System.arraycopy(excludeInPacks, 0, newList, 0, cnt); + newList[cnt] = idx; + excludeInPacks = newList; + } + } + + /** + * Prepare the list of objects to be written to the pack stream. + *

+ * Iterator exactly determines which objects are included in a pack + * and order they appear in pack (except that objects order by type is not + * needed at input). This order should conform general rules of ordering + * objects in git - by recency and path (type and delta-base first is + * internally secured) and responsibility for guaranteeing this order is on + * a caller side. Iterator must return each id of object to write exactly + * once. + *

+ * + * @param objectsSource + * iterator of object to store in a pack; order of objects within + * each type is important, ordering by type is not needed; + * allowed types for objects are {@link Constants#OBJ_COMMIT}, + * {@link Constants#OBJ_TREE}, {@link Constants#OBJ_BLOB} and + * {@link Constants#OBJ_TAG}; objects returned by iterator may be + * later reused by caller as object id and type are internally + * copied in each iteration. + * @throws IOException + * when some I/O problem occur during reading objects. + */ + public void preparePack(final Iterator objectsSource) + throws IOException { + while (objectsSource.hasNext()) { + addObject(objectsSource.next()); + } + } + + /** + * Prepare the list of objects to be written to the pack stream. + *

+ * Basing on these 2 sets, another set of objects to put in a pack file is + * created: this set consists of all objects reachable (ancestors) from + * interesting objects, except uninteresting objects and their ancestors. + * This method uses class {@link ObjectWalk} extensively to find out that + * appropriate set of output objects and their optimal order in output pack. + * Order is consistent with general git in-pack rules: sort by object type, + * recency, path and delta-base first. + *

+ * + * @param countingMonitor + * progress during object enumeration. + * @param want + * collection of objects to be marked as interesting (start + * points of graph traversal). + * @param have + * collection of objects to be marked as uninteresting (end + * points of graph traversal). + * @throws IOException + * when some I/O problem occur during reading objects. + * @deprecated to be removed in 2.0; use the Set version of this method. + */ + @Deprecated + public void preparePack(ProgressMonitor countingMonitor, + final Collection want, + final Collection have) throws IOException { + preparePack(countingMonitor, ensureSet(want), ensureSet(have)); + } + + /** + * Prepare the list of objects to be written to the pack stream. + *

+ * Basing on these 2 sets, another set of objects to put in a pack file is + * created: this set consists of all objects reachable (ancestors) from + * interesting objects, except uninteresting objects and their ancestors. + * This method uses class {@link ObjectWalk} extensively to find out that + * appropriate set of output objects and their optimal order in output pack. + * Order is consistent with general git in-pack rules: sort by object type, + * recency, path and delta-base first. + *

+ * + * @param countingMonitor + * progress during object enumeration. + * @param walk + * ObjectWalk to perform enumeration. + * @param interestingObjects + * collection of objects to be marked as interesting (start + * points of graph traversal). + * @param uninterestingObjects + * collection of objects to be marked as uninteresting (end + * points of graph traversal). + * @throws IOException + * when some I/O problem occur during reading objects. + * @deprecated to be removed in 2.0; use the Set version of this method. + */ + @Deprecated + public void preparePack(ProgressMonitor countingMonitor, + ObjectWalk walk, + final Collection interestingObjects, + final Collection uninterestingObjects) + throws IOException { + preparePack(countingMonitor, walk, + ensureSet(interestingObjects), + ensureSet(uninterestingObjects)); + } + + @SuppressWarnings("unchecked") + private static Set ensureSet(Collection objs) { + Set set; + if (objs instanceof Set) + set = (Set) objs; + else if (objs == null) + set = Collections.emptySet(); + else + set = new HashSet(objs); + return set; + } + + /** + * Prepare the list of objects to be written to the pack stream. + *

+ * Basing on these 2 sets, another set of objects to put in a pack file is + * created: this set consists of all objects reachable (ancestors) from + * interesting objects, except uninteresting objects and their ancestors. + * This method uses class {@link ObjectWalk} extensively to find out that + * appropriate set of output objects and their optimal order in output pack. + * Order is consistent with general git in-pack rules: sort by object type, + * recency, path and delta-base first. + *

+ * + * @param countingMonitor + * progress during object enumeration. + * @param want + * collection of objects to be marked as interesting (start + * points of graph traversal). + * @param have + * collection of objects to be marked as uninteresting (end + * points of graph traversal). + * @throws IOException + * when some I/O problem occur during reading objects. + */ + public void preparePack(ProgressMonitor countingMonitor, + Set want, + Set have) throws IOException { + ObjectWalk ow; + if (shallowPack) + ow = new DepthWalk.ObjectWalk(reader, depth); + else + ow = new ObjectWalk(reader); + preparePack(countingMonitor, ow, want, have); + } + + /** + * Prepare the list of objects to be written to the pack stream. + *

+ * Basing on these 2 sets, another set of objects to put in a pack file is + * created: this set consists of all objects reachable (ancestors) from + * interesting objects, except uninteresting objects and their ancestors. + * This method uses class {@link ObjectWalk} extensively to find out that + * appropriate set of output objects and their optimal order in output pack. + * Order is consistent with general git in-pack rules: sort by object type, + * recency, path and delta-base first. + *

+ * + * @param countingMonitor + * progress during object enumeration. + * @param walk + * ObjectWalk to perform enumeration. + * @param interestingObjects + * collection of objects to be marked as interesting (start + * points of graph traversal). + * @param uninterestingObjects + * collection of objects to be marked as uninteresting (end + * points of graph traversal). + * @throws IOException + * when some I/O problem occur during reading objects. + */ + public void preparePack(ProgressMonitor countingMonitor, + ObjectWalk walk, + final Set interestingObjects, + final Set uninterestingObjects) + throws IOException { + if (countingMonitor == null) + countingMonitor = NullProgressMonitor.INSTANCE; + if (shallowPack && !(walk instanceof DepthWalk.ObjectWalk)) + walk = new DepthWalk.ObjectWalk(reader, depth); + findObjectsToPack(countingMonitor, walk, interestingObjects, + uninterestingObjects); + } + + /** + * Determine if the pack file will contain the requested object. + * + * @param id + * the object to test the existence of. + * @return true if the object will appear in the output pack file. + * @throws IOException + * a cached pack cannot be examined. + */ + public boolean willInclude(final AnyObjectId id) throws IOException { + ObjectToPack obj = objectsMap.get(id); + return obj != null && !obj.isEdge(); + } + + /** + * Lookup the ObjectToPack object for a given ObjectId. + * + * @param id + * the object to find in the pack. + * @return the object we are packing, or null. + */ + public ObjectToPack get(AnyObjectId id) { + ObjectToPack obj = objectsMap.get(id); + return obj != null && !obj.isEdge() ? obj : null; + } + + /** + * Computes SHA-1 of lexicographically sorted objects ids written in this + * pack, as used to name a pack file in repository. + * + * @return ObjectId representing SHA-1 name of a pack that was created. + */ + public ObjectId computeName() { + final byte[] buf = new byte[OBJECT_ID_LENGTH]; + final MessageDigest md = Constants.newMessageDigest(); + for (ObjectToPack otp : sortByName()) { + otp.copyRawTo(buf, 0); + md.update(buf, 0, OBJECT_ID_LENGTH); + } + return ObjectId.fromRaw(md.digest()); + } + + /** + * Returns the index format version that will be written. + *

+ * This method can only be invoked after + * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has + * been invoked and completed successfully. + * + * @return the index format version. + */ + public int getIndexVersion() { + int indexVersion = config.getIndexVersion(); + if (indexVersion <= 0) { + for (BlockList objs : objectsLists) + indexVersion = Math.max(indexVersion, + PackIndexWriter.oldestPossibleFormat(objs)); + } + return indexVersion; + } + + /** + * Create an index file to match the pack file just written. + *

+ * This method can only be invoked after + * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has + * been invoked and completed successfully. Writing a corresponding index is + * an optional feature that not all pack users may require. + * + * @param indexStream + * output for the index data. Caller is responsible for closing + * this stream. + * @throws IOException + * the index data could not be written to the supplied stream. + */ + public void writeIndex(final OutputStream indexStream) throws IOException { + if (isIndexDisabled()) + throw new IOException(JGitText.get().cachedPacksPreventsIndexCreation); + + long writeStart = System.currentTimeMillis(); + final PackIndexWriter iw = PackIndexWriter.createVersion( + indexStream, getIndexVersion()); + iw.write(sortByName(), packcsum); + stats.timeWriting += System.currentTimeMillis() - writeStart; + } + + /** + * Create a bitmap index file to match the pack file just written. + *

+ * This method can only be invoked after + * {@link #prepareBitmapIndex(ProgressMonitor)} has been invoked and + * completed successfully. Writing a corresponding bitmap index is an + * optional feature that not all pack users may require. + * + * @param bitmapIndexStream + * output for the bitmap index data. Caller is responsible for + * closing this stream. + * @throws IOException + * the index data could not be written to the supplied stream. + */ + public void writeBitmapIndex(final OutputStream bitmapIndexStream) + throws IOException { + if (writeBitmaps == null) + throw new IOException(JGitText.get().bitmapsMustBePrepared); + + long writeStart = System.currentTimeMillis(); + final PackBitmapIndexWriterV1 iw = new PackBitmapIndexWriterV1(bitmapIndexStream); + iw.write(writeBitmaps, packcsum); + stats.timeWriting += System.currentTimeMillis() - writeStart; + } + + private List sortByName() { + if (sortedByName == null) { + int cnt = 0; + cnt += objectsLists[OBJ_COMMIT].size(); + cnt += objectsLists[OBJ_TREE].size(); + cnt += objectsLists[OBJ_BLOB].size(); + cnt += objectsLists[OBJ_TAG].size(); + + sortedByName = new BlockList(cnt); + sortedByName.addAll(objectsLists[OBJ_COMMIT]); + sortedByName.addAll(objectsLists[OBJ_TREE]); + sortedByName.addAll(objectsLists[OBJ_BLOB]); + sortedByName.addAll(objectsLists[OBJ_TAG]); + Collections.sort(sortedByName); + } + return sortedByName; + } + + private void beginPhase(PackingPhase phase, ProgressMonitor monitor, + long cnt) { + state.phase = phase; + String task; + switch (phase) { + case COUNTING: + task = JGitText.get().countingObjects; + break; + case GETTING_SIZES: + task = JGitText.get().searchForSizes; + break; + case FINDING_SOURCES: + task = JGitText.get().searchForReuse; + break; + case COMPRESSING: + task = JGitText.get().compressingObjects; + break; + case WRITING: + task = JGitText.get().writingObjects; + break; + case BUILDING_BITMAPS: + task = JGitText.get().buildingBitmaps; + break; + default: + throw new IllegalArgumentException( + MessageFormat.format(JGitText.get().illegalPackingPhase, phase)); + } + monitor.beginTask(task, (int) cnt); + } + + private void endPhase(ProgressMonitor monitor) { + monitor.endTask(); + } + + /** + * Write the prepared pack to the supplied stream. + *

+ * At first, this method collects and sorts objects to pack, then deltas + * search is performed if set up accordingly, finally pack stream is + * written. + *

+ *

+ * All reused objects data checksum (Adler32/CRC32) is computed and + * validated against existing checksum. + *

+ * + * @param compressMonitor + * progress monitor to report object compression work. + * @param writeMonitor + * progress monitor to report the number of objects written. + * @param packStream + * output stream of pack data. The stream should be buffered by + * the caller. The caller is responsible for closing the stream. + * @throws IOException + * an error occurred reading a local object's data to include in + * the pack, or writing compressed object data to the output + * stream. + */ + public void writePack(ProgressMonitor compressMonitor, + ProgressMonitor writeMonitor, OutputStream packStream) + throws IOException { + if (compressMonitor == null) + compressMonitor = NullProgressMonitor.INSTANCE; + if (writeMonitor == null) + writeMonitor = NullProgressMonitor.INSTANCE; + + excludeInPacks = null; + excludeInPackLast = null; + + boolean needSearchForReuse = reuseSupport != null && ( + reuseDeltas + || config.isReuseObjects() + || !cachedPacks.isEmpty()); + + if (compressMonitor instanceof BatchingProgressMonitor) { + long delay = 1000; + if (needSearchForReuse && config.isDeltaCompress()) + delay = 500; + ((BatchingProgressMonitor) compressMonitor).setDelayStart( + delay, + TimeUnit.MILLISECONDS); + } + + if (needSearchForReuse) + searchForReuse(compressMonitor); + if (config.isDeltaCompress()) + searchForDeltas(compressMonitor); + + crc32 = new CRC32(); + final PackOutputStream out = new PackOutputStream( + writeMonitor, + isIndexDisabled() + ? packStream + : new CheckedOutputStream(packStream, crc32), + this); + + long objCnt = getObjectCount(); + stats.totalObjects = objCnt; + beginPhase(PackingPhase.WRITING, writeMonitor, objCnt); + long writeStart = System.currentTimeMillis(); + + out.writeFileHeader(PACK_VERSION_GENERATED, objCnt); + out.flush(); + + writeObjects(out); + if (!edgeObjects.isEmpty() || !cachedPacks.isEmpty()) { + for (Statistics.ObjectType typeStat : stats.objectTypes) { + if (typeStat == null) + continue; + stats.thinPackBytes += typeStat.bytes; + } + } + + for (CachedPack pack : cachedPacks) { + long deltaCnt = pack.getDeltaCount(); + stats.reusedObjects += pack.getObjectCount(); + stats.reusedDeltas += deltaCnt; + stats.totalDeltas += deltaCnt; + reuseSupport.copyPackAsIs(out, pack, reuseValidate); + } + writeChecksum(out); + out.flush(); + stats.timeWriting = System.currentTimeMillis() - writeStart; + stats.totalBytes = out.length(); + stats.reusedPacks = Collections.unmodifiableList(cachedPacks); + stats.depth = depth; + + for (Statistics.ObjectType typeStat : stats.objectTypes) { + if (typeStat == null) + continue; + typeStat.cntDeltas += typeStat.reusedDeltas; + + stats.reusedObjects += typeStat.reusedObjects; + stats.reusedDeltas += typeStat.reusedDeltas; + stats.totalDeltas += typeStat.cntDeltas; + } + + reader.release(); + endPhase(writeMonitor); + } + + /** + * @return description of what this PackWriter did in order to create the + * final pack stream. The object is only available to callers after + * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} + */ + public Statistics getStatistics() { + return stats; + } + + /** @return snapshot of the current state of this PackWriter. */ + public State getState() { + return state.snapshot(); + } + + /** Release all resources used by this writer. */ + public void release() { + reader.release(); + if (myDeflater != null) { + myDeflater.end(); + myDeflater = null; + } + instances.remove(selfRef); + } + + private void searchForReuse(ProgressMonitor monitor) throws IOException { + long cnt = 0; + cnt += objectsLists[OBJ_COMMIT].size(); + cnt += objectsLists[OBJ_TREE].size(); + cnt += objectsLists[OBJ_BLOB].size(); + cnt += objectsLists[OBJ_TAG].size(); + + long start = System.currentTimeMillis(); + beginPhase(PackingPhase.FINDING_SOURCES, monitor, cnt); + if (cnt <= 4096) { + // For small object counts, do everything as one list. + BlockList tmp = new BlockList((int) cnt); + tmp.addAll(objectsLists[OBJ_TAG]); + tmp.addAll(objectsLists[OBJ_COMMIT]); + tmp.addAll(objectsLists[OBJ_TREE]); + tmp.addAll(objectsLists[OBJ_BLOB]); + searchForReuse(monitor, tmp); + if (pruneCurrentObjectList) { + // If the list was pruned, we need to re-prune the main lists. + pruneEdgesFromObjectList(objectsLists[OBJ_COMMIT]); + pruneEdgesFromObjectList(objectsLists[OBJ_TREE]); + pruneEdgesFromObjectList(objectsLists[OBJ_BLOB]); + pruneEdgesFromObjectList(objectsLists[OBJ_TAG]); + } + } else { + searchForReuse(monitor, objectsLists[OBJ_TAG]); + searchForReuse(monitor, objectsLists[OBJ_COMMIT]); + searchForReuse(monitor, objectsLists[OBJ_TREE]); + searchForReuse(monitor, objectsLists[OBJ_BLOB]); + } + endPhase(monitor); + stats.timeSearchingForReuse = System.currentTimeMillis() - start; + + if (config.isReuseDeltas() && config.getCutDeltaChains()) { + cutDeltaChains(objectsLists[OBJ_TREE]); + cutDeltaChains(objectsLists[OBJ_BLOB]); + } + } + + private void searchForReuse(ProgressMonitor monitor, List list) + throws IOException, MissingObjectException { + pruneCurrentObjectList = false; + reuseSupport.selectObjectRepresentation(this, monitor, list); + if (pruneCurrentObjectList) + pruneEdgesFromObjectList(list); + } + + private void cutDeltaChains(BlockList list) + throws IOException { + int max = config.getMaxDeltaDepth(); + for (int idx = list.size() - 1; idx >= 0; idx--) { + int d = 0; + ObjectToPack b = list.get(idx).getDeltaBase(); + while (b != null) { + if (d < b.getChainLength()) + break; + b.setChainLength(++d); + if (d >= max && b.isDeltaRepresentation()) { + reselectNonDelta(b); + break; + } + b = b.getDeltaBase(); + } + } + if (config.isDeltaCompress()) { + for (ObjectToPack otp : list) + otp.clearChainLength(); + } + } + + private void searchForDeltas(ProgressMonitor monitor) + throws MissingObjectException, IncorrectObjectTypeException, + IOException { + // Commits and annotated tags tend to have too many differences to + // really benefit from delta compression. Consequently just don't + // bother examining those types here. + // + ObjectToPack[] list = new ObjectToPack[ + objectsLists[OBJ_TREE].size() + + objectsLists[OBJ_BLOB].size() + + edgeObjects.size()]; + int cnt = 0; + cnt = findObjectsNeedingDelta(list, cnt, OBJ_TREE); + cnt = findObjectsNeedingDelta(list, cnt, OBJ_BLOB); + if (cnt == 0) + return; + int nonEdgeCnt = cnt; + + // Queue up any edge objects that we might delta against. We won't + // be sending these as we assume the other side has them, but we need + // them in the search phase below. + // + for (ObjectToPack eo : edgeObjects) { + eo.setWeight(0); + list[cnt++] = eo; + } + + // Compute the sizes of the objects so we can do a proper sort. + // We let the reader skip missing objects if it chooses. For + // some readers this can be a huge win. We detect missing objects + // by having set the weights above to 0 and allowing the delta + // search code to discover the missing object and skip over it, or + // abort with an exception if we actually had to have it. + // + final long sizingStart = System.currentTimeMillis(); + beginPhase(PackingPhase.GETTING_SIZES, monitor, cnt); + AsyncObjectSizeQueue sizeQueue = reader.getObjectSize( + Arrays. asList(list).subList(0, cnt), false); + try { + final long limit = Math.min( + config.getBigFileThreshold(), + Integer.MAX_VALUE); + for (;;) { + try { + if (!sizeQueue.next()) + break; + } catch (MissingObjectException notFound) { + monitor.update(1); + if (ignoreMissingUninteresting) { + ObjectToPack otp = sizeQueue.getCurrent(); + if (otp != null && otp.isEdge()) { + otp.setDoNotDelta(); + continue; + } + + otp = objectsMap.get(notFound.getObjectId()); + if (otp != null && otp.isEdge()) { + otp.setDoNotDelta(); + continue; + } + } + throw notFound; + } + + ObjectToPack otp = sizeQueue.getCurrent(); + if (otp == null) + otp = objectsMap.get(sizeQueue.getObjectId()); + + long sz = sizeQueue.getSize(); + if (DeltaIndex.BLKSZ < sz && sz < limit) + otp.setWeight((int) sz); + else + otp.setDoNotDelta(); // too small, or too big + monitor.update(1); + } + } finally { + sizeQueue.release(); + } + endPhase(monitor); + stats.timeSearchingForSizes = System.currentTimeMillis() - sizingStart; + + // Sort the objects by path hash so like files are near each other, + // and then by size descending so that bigger files are first. This + // applies "Linus' Law" which states that newer files tend to be the + // bigger ones, because source files grow and hardly ever shrink. + // + Arrays.sort(list, 0, cnt, new Comparator() { + public int compare(ObjectToPack a, ObjectToPack b) { + int cmp = (a.isDoNotDelta() ? 1 : 0) + - (b.isDoNotDelta() ? 1 : 0); + if (cmp != 0) + return cmp; + + cmp = a.getType() - b.getType(); + if (cmp != 0) + return cmp; + + cmp = (a.getPathHash() >>> 1) - (b.getPathHash() >>> 1); + if (cmp != 0) + return cmp; + + cmp = (a.getPathHash() & 1) - (b.getPathHash() & 1); + if (cmp != 0) + return cmp; + + cmp = (a.isEdge() ? 0 : 1) - (b.isEdge() ? 0 : 1); + if (cmp != 0) + return cmp; + + return b.getWeight() - a.getWeight(); + } + }); + + // Above we stored the objects we cannot delta onto the end. + // Remove them from the list so we don't waste time on them. + while (0 < cnt && list[cnt - 1].isDoNotDelta()) { + if (!list[cnt - 1].isEdge()) + nonEdgeCnt--; + cnt--; + } + if (cnt == 0) + return; + + final long searchStart = System.currentTimeMillis(); + beginPhase(PackingPhase.COMPRESSING, monitor, nonEdgeCnt); + searchForDeltas(monitor, list, cnt); + endPhase(monitor); + stats.deltaSearchNonEdgeObjects = nonEdgeCnt; + stats.timeCompressing = System.currentTimeMillis() - searchStart; + + for (int i = 0; i < cnt; i++) + if (!list[i].isEdge() && list[i].isDeltaRepresentation()) + stats.deltasFound++; + } + + private int findObjectsNeedingDelta(ObjectToPack[] list, int cnt, int type) { + for (ObjectToPack otp : objectsLists[type]) { + if (otp.isDoNotDelta()) // delta is disabled for this path + continue; + if (otp.isDeltaRepresentation()) // already reusing a delta + continue; + otp.setWeight(0); + list[cnt++] = otp; + } + return cnt; + } + + private void reselectNonDelta(ObjectToPack otp) throws IOException { + otp.clearDeltaBase(); + otp.clearReuseAsIs(); + boolean old = reuseDeltas; + reuseDeltas = false; + reuseSupport.selectObjectRepresentation(this, + NullProgressMonitor.INSTANCE, + Collections.singleton(otp)); + reuseDeltas = old; + } + + private void searchForDeltas(final ProgressMonitor monitor, + final ObjectToPack[] list, final int cnt) + throws MissingObjectException, IncorrectObjectTypeException, + LargeObjectException, IOException { + int threads = config.getThreads(); + if (threads == 0) + threads = Runtime.getRuntime().availableProcessors(); + + if (threads <= 1 || cnt <= 2 * config.getDeltaSearchWindowSize()) { + new DeltaWindow(config, new DeltaCache(config), reader, monitor, + list, 0, cnt).search(); + return; + } + + final DeltaCache dc = new ThreadSafeDeltaCache(config); + final ThreadSafeProgressMonitor pm = new ThreadSafeProgressMonitor(monitor); + + DeltaTask.Block taskBlock = new DeltaTask.Block(threads, config, + reader, dc, pm, + list, 0, cnt); + taskBlock.partitionTasks(); + pm.startWorkers(taskBlock.tasks.size()); + + final Executor executor = config.getExecutor(); + final List errors = Collections + .synchronizedList(new ArrayList()); + if (executor instanceof ExecutorService) { + // Caller supplied us a service, use it directly. + runTasks((ExecutorService) executor, pm, taskBlock, errors); + } else if (executor == null) { + // Caller didn't give us a way to run the tasks, spawn up a + // temporary thread pool and make sure it tears down cleanly. + ExecutorService pool = Executors.newFixedThreadPool(threads); + try { + runTasks(pool, pm, taskBlock, errors); + } finally { + pool.shutdown(); + for (;;) { + try { + if (pool.awaitTermination(60, TimeUnit.SECONDS)) + break; + } catch (InterruptedException e) { + throw new IOException( + JGitText.get().packingCancelledDuringObjectsWriting); + } + } + } + } else { + // The caller gave us an executor, but it might not do + // asynchronous execution. Wrap everything and hope it + // can schedule these for us. + for (final DeltaTask task : taskBlock.tasks) { + executor.execute(new Runnable() { + public void run() { + try { + task.call(); + } catch (Throwable failure) { + errors.add(failure); + } + } + }); + } + try { + pm.waitForCompletion(); + } catch (InterruptedException ie) { + // We can't abort the other tasks as we have no handle. + // Cross our fingers and just break out anyway. + // + throw new IOException( + JGitText.get().packingCancelledDuringObjectsWriting); + } + } + + // If any task threw an error, try to report it back as + // though we weren't using a threaded search algorithm. + // + if (!errors.isEmpty()) { + Throwable err = errors.get(0); + if (err instanceof Error) + throw (Error) err; + if (err instanceof RuntimeException) + throw (RuntimeException) err; + if (err instanceof IOException) + throw (IOException) err; + + IOException fail = new IOException(err.getMessage()); + fail.initCause(err); + throw fail; + } + } + + private static void runTasks(ExecutorService pool, + ThreadSafeProgressMonitor pm, + DeltaTask.Block tb, List errors) throws IOException { + List> futures = new ArrayList>(tb.tasks.size()); + for (DeltaTask task : tb.tasks) + futures.add(pool.submit(task)); + + try { + pm.waitForCompletion(); + for (Future f : futures) { + try { + f.get(); + } catch (ExecutionException failed) { + errors.add(failed.getCause()); + } + } + } catch (InterruptedException ie) { + for (Future f : futures) + f.cancel(true); + throw new IOException( + JGitText.get().packingCancelledDuringObjectsWriting); + } + } + + private void writeObjects(PackOutputStream out) throws IOException { + writeObjects(out, objectsLists[OBJ_COMMIT]); + writeObjects(out, objectsLists[OBJ_TAG]); + writeObjects(out, objectsLists[OBJ_TREE]); + writeObjects(out, objectsLists[OBJ_BLOB]); + } + + private void writeObjects(PackOutputStream out, List list) + throws IOException { + if (list.isEmpty()) + return; + + typeStats = stats.objectTypes[list.get(0).getType()]; + long beginOffset = out.length(); + + if (reuseSupport != null) { + reuseSupport.writeObjects(out, list); + } else { + for (ObjectToPack otp : list) + out.writeObject(otp); + } + + typeStats.bytes += out.length() - beginOffset; + typeStats.cntObjects = list.size(); + } + + void writeObject(PackOutputStream out, ObjectToPack otp) throws IOException { + if (!otp.isWritten()) + writeObjectImpl(out, otp); + } + + private void writeObjectImpl(PackOutputStream out, ObjectToPack otp) + throws IOException { + if (otp.wantWrite()) { + // A cycle exists in this delta chain. This should only occur if a + // selected object representation disappeared during writing + // (for example due to a concurrent repack) and a different base + // was chosen, forcing a cycle. Select something other than a + // delta, and write this object. + reselectNonDelta(otp); + } + otp.markWantWrite(); + + while (otp.isReuseAsIs()) { + writeBase(out, otp.getDeltaBase()); + if (otp.isWritten()) + return; // Delta chain cycle caused this to write already. + + crc32.reset(); + otp.setOffset(out.length()); + try { + reuseSupport.copyObjectAsIs(out, otp, reuseValidate); + out.endObject(); + otp.setCRC((int) crc32.getValue()); + typeStats.reusedObjects++; + if (otp.isDeltaRepresentation()) { + typeStats.reusedDeltas++; + typeStats.deltaBytes += out.length() - otp.getOffset(); + } + return; + } catch (StoredObjectRepresentationNotAvailableException gone) { + if (otp.getOffset() == out.length()) { + otp.setOffset(0); + otp.clearDeltaBase(); + otp.clearReuseAsIs(); + reuseSupport.selectObjectRepresentation(this, + NullProgressMonitor.INSTANCE, + Collections.singleton(otp)); + continue; + } else { + // Object writing already started, we cannot recover. + // + CorruptObjectException coe; + coe = new CorruptObjectException(otp, ""); //$NON-NLS-1$ + coe.initCause(gone); + throw coe; + } + } + } + + // If we reached here, reuse wasn't possible. + // + if (otp.isDeltaRepresentation()) + writeDeltaObjectDeflate(out, otp); + else + writeWholeObjectDeflate(out, otp); + out.endObject(); + otp.setCRC((int) crc32.getValue()); + } + + private void writeBase(PackOutputStream out, ObjectToPack base) + throws IOException { + if (base != null && !base.isWritten() && !base.isEdge()) + writeObjectImpl(out, base); + } + + private void writeWholeObjectDeflate(PackOutputStream out, + final ObjectToPack otp) throws IOException { + final Deflater deflater = deflater(); + final ObjectLoader ldr = reader.open(otp, otp.getType()); + + crc32.reset(); + otp.setOffset(out.length()); + out.writeHeader(otp, ldr.getSize()); + + deflater.reset(); + DeflaterOutputStream dst = new DeflaterOutputStream(out, deflater); + ldr.copyTo(dst); + dst.finish(); + } + + private void writeDeltaObjectDeflate(PackOutputStream out, + final ObjectToPack otp) throws IOException { + writeBase(out, otp.getDeltaBase()); + + crc32.reset(); + otp.setOffset(out.length()); + + DeltaCache.Ref ref = otp.popCachedDelta(); + if (ref != null) { + byte[] zbuf = ref.get(); + if (zbuf != null) { + out.writeHeader(otp, otp.getCachedSize()); + out.write(zbuf); + return; + } + } + + TemporaryBuffer.Heap delta = delta(otp); + out.writeHeader(otp, delta.length()); + + Deflater deflater = deflater(); + deflater.reset(); + DeflaterOutputStream dst = new DeflaterOutputStream(out, deflater); + delta.writeTo(dst, null); + dst.finish(); + typeStats.cntDeltas++; + typeStats.deltaBytes += out.length() - otp.getOffset(); + } + + private TemporaryBuffer.Heap delta(final ObjectToPack otp) + throws IOException { + DeltaIndex index = new DeltaIndex(buffer(otp.getDeltaBaseId())); + byte[] res = buffer(otp); + + // We never would have proposed this pair if the delta would be + // larger than the unpacked version of the object. So using it + // as our buffer limit is valid: we will never reach it. + // + TemporaryBuffer.Heap delta = new TemporaryBuffer.Heap(res.length); + index.encode(delta, res); + return delta; + } + + private byte[] buffer(AnyObjectId objId) throws IOException { + return buffer(config, reader, objId); + } + + static byte[] buffer(PackConfig config, ObjectReader or, AnyObjectId objId) + throws IOException { + // PackWriter should have already pruned objects that + // are above the big file threshold, so our chances of + // the object being below it are very good. We really + // shouldn't be here, unless the implementation is odd. + + return or.open(objId).getCachedBytes(config.getBigFileThreshold()); + } + + private Deflater deflater() { + if (myDeflater == null) + myDeflater = new Deflater(config.getCompressionLevel()); + return myDeflater; + } + + private void writeChecksum(PackOutputStream out) throws IOException { + packcsum = out.getDigest(); + out.write(packcsum); + } + + private void findObjectsToPack(final ProgressMonitor countingMonitor, + final ObjectWalk walker, final Set want, + Set have) + throws MissingObjectException, IOException, + IncorrectObjectTypeException { + final long countingStart = System.currentTimeMillis(); + beginPhase(PackingPhase.COUNTING, countingMonitor, ProgressMonitor.UNKNOWN); + + if (have == null) + have = Collections.emptySet(); + + stats.interestingObjects = Collections.unmodifiableSet(new HashSet(want)); + stats.uninterestingObjects = Collections.unmodifiableSet(new HashSet(have)); + + walker.setRetainBody(false); + + canBuildBitmaps = config.isBuildBitmaps() + && !shallowPack + && have.isEmpty() + && (excludeInPacks == null || excludeInPacks.length == 0); + if (!shallowPack && useBitmaps) { + BitmapIndex bitmapIndex = reader.getBitmapIndex(); + if (bitmapIndex != null) { + PackWriterBitmapWalker bitmapWalker = new PackWriterBitmapWalker( + walker, bitmapIndex, countingMonitor); + findObjectsToPackUsingBitmaps(bitmapWalker, want, have); + endPhase(countingMonitor); + stats.timeCounting = System.currentTimeMillis() - countingStart; + return; + } + } + + List all = new ArrayList(want.size() + have.size()); + all.addAll(want); + all.addAll(have); + + final RevFlag include = walker.newFlag("include"); //$NON-NLS-1$ + final RevFlag added = walker.newFlag("added"); //$NON-NLS-1$ + + walker.carry(include); + + int haveEst = have.size(); + if (have.isEmpty()) { + walker.sort(RevSort.COMMIT_TIME_DESC); + } else { + walker.sort(RevSort.TOPO); + if (thin) + walker.sort(RevSort.BOUNDARY, true); + } + + List wantObjs = new ArrayList(want.size()); + List haveObjs = new ArrayList(haveEst); + List wantTags = new ArrayList(want.size()); + + AsyncRevObjectQueue q = walker.parseAny(all, true); + try { + for (;;) { + try { + RevObject o = q.next(); + if (o == null) + break; + if (have.contains(o)) + haveObjs.add(o); + if (want.contains(o)) { + o.add(include); + wantObjs.add(o); + if (o instanceof RevTag) + wantTags.add((RevTag) o); + } + } catch (MissingObjectException e) { + if (ignoreMissingUninteresting + && have.contains(e.getObjectId())) + continue; + throw e; + } + } + } finally { + q.release(); + } + + if (!wantTags.isEmpty()) { + all = new ArrayList(wantTags.size()); + for (RevTag tag : wantTags) + all.add(tag.getObject()); + q = walker.parseAny(all, true); + try { + while (q.next() != null) { + // Just need to pop the queue item to parse the object. + } + } finally { + q.release(); + } + } + + if (walker instanceof DepthWalk.ObjectWalk) { + DepthWalk.ObjectWalk depthWalk = (DepthWalk.ObjectWalk) walker; + for (RevObject obj : wantObjs) + depthWalk.markRoot(obj); + if (unshallowObjects != null) { + for (ObjectId id : unshallowObjects) + depthWalk.markUnshallow(walker.parseAny(id)); + } + } else { + for (RevObject obj : wantObjs) + walker.markStart(obj); + } + for (RevObject obj : haveObjs) + walker.markUninteresting(obj); + + final int maxBases = config.getDeltaSearchWindowSize(); + Set baseTrees = new HashSet(); + BlockList commits = new BlockList(); + RevCommit c; + while ((c = walker.next()) != null) { + if (exclude(c)) + continue; + if (c.has(RevFlag.UNINTERESTING)) { + if (baseTrees.size() <= maxBases) + baseTrees.add(c.getTree()); + continue; + } + + commits.add(c); + countingMonitor.update(1); + } + + if (shallowPack) { + for (RevCommit cmit : commits) { + addObject(cmit, 0); + } + } else { + int commitCnt = 0; + boolean putTagTargets = false; + for (RevCommit cmit : commits) { + if (!cmit.has(added)) { + cmit.add(added); + addObject(cmit, 0); + commitCnt++; + } + + for (int i = 0; i < cmit.getParentCount(); i++) { + RevCommit p = cmit.getParent(i); + if (!p.has(added) && !p.has(RevFlag.UNINTERESTING) + && !exclude(p)) { + p.add(added); + addObject(p, 0); + commitCnt++; + } + } + + if (!putTagTargets && 4096 < commitCnt) { + for (ObjectId id : tagTargets) { + RevObject obj = walker.lookupOrNull(id); + if (obj instanceof RevCommit + && obj.has(include) + && !obj.has(RevFlag.UNINTERESTING) + && !obj.has(added)) { + obj.add(added); + addObject(obj, 0); + } + } + putTagTargets = true; + } + } + } + commits = null; + + if (thin && !baseTrees.isEmpty()) { + BaseSearch bases = new BaseSearch(countingMonitor, baseTrees, // + objectsMap, edgeObjects, reader); + RevObject o; + while ((o = walker.nextObject()) != null) { + if (o.has(RevFlag.UNINTERESTING)) + continue; + if (exclude(o)) + continue; + + int pathHash = walker.getPathHashCode(); + byte[] pathBuf = walker.getPathBuffer(); + int pathLen = walker.getPathLength(); + bases.addBase(o.getType(), pathBuf, pathLen, pathHash); + addObject(o, pathHash); + countingMonitor.update(1); + } + } else { + RevObject o; + while ((o = walker.nextObject()) != null) { + if (o.has(RevFlag.UNINTERESTING)) + continue; + if (exclude(o)) + continue; + addObject(o, walker.getPathHashCode()); + countingMonitor.update(1); + } + } + + for (CachedPack pack : cachedPacks) + countingMonitor.update((int) pack.getObjectCount()); + endPhase(countingMonitor); + stats.timeCounting = System.currentTimeMillis() - countingStart; + } + + private void findObjectsToPackUsingBitmaps( + PackWriterBitmapWalker bitmapWalker, Set want, + Set have) + throws MissingObjectException, IncorrectObjectTypeException, + IOException { + BitmapBuilder haveBitmap = bitmapWalker.findObjects(have, null); + bitmapWalker.reset(); + BitmapBuilder wantBitmap = bitmapWalker.findObjects(want, haveBitmap); + BitmapBuilder needBitmap = wantBitmap.andNot(haveBitmap); + + if (useCachedPacks && reuseSupport != null + && (excludeInPacks == null || excludeInPacks.length == 0)) + cachedPacks.addAll( + reuseSupport.getCachedPacksAndUpdate(needBitmap)); + + for (BitmapObject obj : needBitmap) { + ObjectId objectId = obj.getObjectId(); + if (exclude(objectId)) { + needBitmap.remove(objectId); + continue; + } + addObject(objectId, obj.getType(), 0); + } + + if (thin) + haveObjects = haveBitmap; + } + + private static void pruneEdgesFromObjectList(List list) { + final int size = list.size(); + int src = 0; + int dst = 0; + + for (; src < size; src++) { + ObjectToPack obj = list.get(src); + if (obj.isEdge()) + continue; + if (dst != src) + list.set(dst, obj); + dst++; + } + + while (dst < list.size()) + list.remove(list.size() - 1); + } + + /** + * Include one object to the output file. + *

+ * Objects are written in the order they are added. If the same object is + * added twice, it may be written twice, creating a larger than necessary + * file. + * + * @param object + * the object to add. + * @throws IncorrectObjectTypeException + * the object is an unsupported type. + */ + public void addObject(final RevObject object) + throws IncorrectObjectTypeException { + if (!exclude(object)) + addObject(object, 0); + } + + private void addObject(final RevObject object, final int pathHashCode) { + addObject(object, object.getType(), pathHashCode); + } + + private void addObject( + final AnyObjectId src, final int type, final int pathHashCode) { + final ObjectToPack otp; + if (reuseSupport != null) + otp = reuseSupport.newObjectToPack(src, type); + else + otp = new ObjectToPack(src, type); + otp.setPathHash(pathHashCode); + objectsLists[type].add(otp); + objectsMap.add(otp); + } + + private boolean exclude(AnyObjectId objectId) { + if (excludeInPacks == null) + return false; + if (excludeInPackLast.contains(objectId)) + return true; + for (ObjectIdSet idx : excludeInPacks) { + if (idx.contains(objectId)) { + excludeInPackLast = idx; + return true; + } + } + return false; + } + + /** + * Select an object representation for this writer. + *

+ * An {@link ObjectReader} implementation should invoke this method once for + * each representation available for an object, to allow the writer to find + * the most suitable one for the output. + * + * @param otp + * the object being packed. + * @param next + * the next available representation from the repository. + */ + public void select(ObjectToPack otp, StoredObjectRepresentation next) { + int nFmt = next.getFormat(); + + if (!cachedPacks.isEmpty()) { + if (otp.isEdge()) + return; + if ((nFmt == PACK_WHOLE) | (nFmt == PACK_DELTA)) { + for (CachedPack pack : cachedPacks) { + if (pack.hasObject(otp, next)) { + otp.setEdge(); + otp.clearDeltaBase(); + otp.clearReuseAsIs(); + pruneCurrentObjectList = true; + return; + } + } + } + } + + if (nFmt == PACK_DELTA && reuseDeltas && reuseDeltaFor(otp)) { + ObjectId baseId = next.getDeltaBase(); + ObjectToPack ptr = objectsMap.get(baseId); + if (ptr != null && !ptr.isEdge()) { + otp.setDeltaBase(ptr); + otp.setReuseAsIs(); + } else if (thin && have(ptr, baseId)) { + otp.setDeltaBase(baseId); + otp.setReuseAsIs(); + } else { + otp.clearDeltaBase(); + otp.clearReuseAsIs(); + } + } else if (nFmt == PACK_WHOLE && config.isReuseObjects()) { + int nWeight = next.getWeight(); + if (otp.isReuseAsIs() && !otp.isDeltaRepresentation()) { + // We've chosen another PACK_WHOLE format for this object, + // choose the one that has the smaller compressed size. + // + if (otp.getWeight() <= nWeight) + return; + } + otp.clearDeltaBase(); + otp.setReuseAsIs(); + otp.setWeight(nWeight); + } else { + otp.clearDeltaBase(); + otp.clearReuseAsIs(); + } + + otp.setDeltaAttempted(reuseDeltas & next.wasDeltaAttempted()); + otp.select(next); + } + + private final boolean have(ObjectToPack ptr, AnyObjectId objectId) { + return (ptr != null && ptr.isEdge()) + || (haveObjects != null && haveObjects.contains(objectId)); + } + + /** + * Prepares the bitmaps to be written to the pack index. Bitmaps can be used + * to speed up fetches and clones by storing the entire object graph at + * selected commits. + * + * This method can only be invoked after + * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has + * been invoked and completed successfully. Writing a corresponding bitmap + * index is an optional feature that not all pack users may require. + * + * @param pm + * progress monitor to report bitmap building work. + * @return whether a bitmap index may be written. + * @throws IOException + * when some I/O problem occur during reading objects. + */ + public boolean prepareBitmapIndex(ProgressMonitor pm) throws IOException { + if (!canBuildBitmaps || getObjectCount() > Integer.MAX_VALUE + || !cachedPacks.isEmpty()) + return false; + + if (pm == null) + pm = NullProgressMonitor.INSTANCE; + + writeBitmaps = new PackBitmapIndexBuilder(sortByName()); + PackWriterBitmapPreparer bitmapPreparer = new PackWriterBitmapPreparer( + reader, writeBitmaps, pm, stats.interestingObjects); + + int numCommits = objectsLists[OBJ_COMMIT].size(); + Collection selectedCommits = + bitmapPreparer.doCommitSelection(numCommits); + + beginPhase(PackingPhase.BUILDING_BITMAPS, pm, selectedCommits.size()); + + PackWriterBitmapWalker walker = bitmapPreparer.newBitmapWalker(); + AnyObjectId last = null; + for (PackWriterBitmapPreparer.BitmapCommit cmit : selectedCommits) { + if (cmit.isReuseWalker()) + walker.reset(); + else + walker = bitmapPreparer.newBitmapWalker(); + + BitmapBuilder bitmap = walker.findObjects( + Collections.singleton(cmit), null); + + if (last != null && cmit.isReuseWalker() && !bitmap.contains(last)) + throw new IllegalStateException(MessageFormat.format( + JGitText.get().bitmapMissingObject, cmit.name(), + last.name())); + last = cmit; + writeBitmaps.addBitmap(cmit, bitmap.build(), cmit.getFlags()); + + pm.update(1); + } + + endPhase(pm); + return true; + } + + private boolean reuseDeltaFor(ObjectToPack otp) { + int type = otp.getType(); + if ((type & 2) != 0) // OBJ_TREE(2) or OBJ_BLOB(3) + return true; + if (type == OBJ_COMMIT) + return reuseDeltaCommits; + if (type == OBJ_TAG) + return false; + return true; + } + + /** Summary of how PackWriter created the pack. */ + public static class Statistics { + /** Statistics about a single class of object. */ + public static class ObjectType { + long cntObjects; + + long cntDeltas; + + long reusedObjects; + + long reusedDeltas; + + long bytes; + + long deltaBytes; + + /** + * @return total number of objects output. This total includes the + * value of {@link #getDeltas()}. + */ + public long getObjects() { + return cntObjects; + } + + /** + * @return total number of deltas output. This may be lower than the + * actual number of deltas if a cached pack was reused. + */ + public long getDeltas() { + return cntDeltas; + } + + /** + * @return number of objects whose existing representation was + * reused in the output. This count includes + * {@link #getReusedDeltas()}. + */ + public long getReusedObjects() { + return reusedObjects; + } + + /** + * @return number of deltas whose existing representation was reused + * in the output, as their base object was also output or + * was assumed present for a thin pack. This may be lower + * than the actual number of reused deltas if a cached pack + * was reused. + */ + public long getReusedDeltas() { + return reusedDeltas; + } + + /** + * @return total number of bytes written. This size includes the + * object headers as well as the compressed data. This size + * also includes all of {@link #getDeltaBytes()}. + */ + public long getBytes() { + return bytes; + } + + /** + * @return number of delta bytes written. This size includes the + * object headers for the delta objects. + */ + public long getDeltaBytes() { + return deltaBytes; + } + } + + Set interestingObjects; + + Set uninterestingObjects; + + Collection reusedPacks; + + int depth; + + int deltaSearchNonEdgeObjects; + + int deltasFound; + + long totalObjects; + + long totalDeltas; + + long reusedObjects; + + long reusedDeltas; + + long totalBytes; + + long thinPackBytes; + + long timeCounting; + + long timeSearchingForReuse; + + long timeSearchingForSizes; + + long timeCompressing; + + long timeWriting; + + ObjectType[] objectTypes; + + { + objectTypes = new ObjectType[5]; + objectTypes[OBJ_COMMIT] = new ObjectType(); + objectTypes[OBJ_TREE] = new ObjectType(); + objectTypes[OBJ_BLOB] = new ObjectType(); + objectTypes[OBJ_TAG] = new ObjectType(); + } + + /** + * @return unmodifiable collection of objects to be included in the + * pack. May be null if the pack was hand-crafted in a unit + * test. + */ + public Set getInterestingObjects() { + return interestingObjects; + } + + /** + * @return unmodifiable collection of objects that should be excluded + * from the pack, as the peer that will receive the pack already + * has these objects. + */ + public Set getUninterestingObjects() { + return uninterestingObjects; + } + + /** + * @return unmodifiable collection of the cached packs that were reused + * in the output, if any were selected for reuse. + */ + public Collection getReusedPacks() { + return reusedPacks; + } + + /** + * @return number of objects in the output pack that went through the + * delta search process in order to find a potential delta base. + */ + public int getDeltaSearchNonEdgeObjects() { + return deltaSearchNonEdgeObjects; + } + + /** + * @return number of objects in the output pack that went through delta + * base search and found a suitable base. This is a subset of + * {@link #getDeltaSearchNonEdgeObjects()}. + */ + public int getDeltasFound() { + return deltasFound; + } + + /** + * @return total number of objects output. This total includes the value + * of {@link #getTotalDeltas()}. + */ + public long getTotalObjects() { + return totalObjects; + } + + /** + * @return total number of deltas output. This may be lower than the + * actual number of deltas if a cached pack was reused. + */ + public long getTotalDeltas() { + return totalDeltas; + } + + /** + * @return number of objects whose existing representation was reused in + * the output. This count includes {@link #getReusedDeltas()}. + */ + public long getReusedObjects() { + return reusedObjects; + } + + /** + * @return number of deltas whose existing representation was reused in + * the output, as their base object was also output or was + * assumed present for a thin pack. This may be lower than the + * actual number of reused deltas if a cached pack was reused. + */ + public long getReusedDeltas() { + return reusedDeltas; + } + + /** + * @return total number of bytes written. This size includes the pack + * header, trailer, thin pack, and reused cached pack(s). + */ + public long getTotalBytes() { + return totalBytes; + } + + /** + * @return size of the thin pack in bytes, if a thin pack was generated. + * A thin pack is created when the client already has objects + * and some deltas are created against those objects, or if a + * cached pack is being used and some deltas will reference + * objects in the cached pack. This size does not include the + * pack header or trailer. + */ + public long getThinPackBytes() { + return thinPackBytes; + } + + /** + * @param typeCode + * object type code, e.g. OBJ_COMMIT or OBJ_TREE. + * @return information about this type of object in the pack. + */ + public ObjectType byObjectType(int typeCode) { + return objectTypes[typeCode]; + } + + /** @return true if the resulting pack file was a shallow pack. */ + public boolean isShallow() { + return depth > 0; + } + + /** @return depth (in commits) the pack includes if shallow. */ + public int getDepth() { + return depth; + } + + /** + * @return time in milliseconds spent enumerating the objects that need + * to be included in the output. This time includes any restarts + * that occur when a cached pack is selected for reuse. + */ + public long getTimeCounting() { + return timeCounting; + } + + /** + * @return time in milliseconds spent matching existing representations + * against objects that will be transmitted, or that the client + * can be assumed to already have. + */ + public long getTimeSearchingForReuse() { + return timeSearchingForReuse; + } + + /** + * @return time in milliseconds spent finding the sizes of all objects + * that will enter the delta compression search window. The + * sizes need to be known to better match similar objects + * together and improve delta compression ratios. + */ + public long getTimeSearchingForSizes() { + return timeSearchingForSizes; + } + + /** + * @return time in milliseconds spent on delta compression. This is + * observed wall-clock time and does not accurately track CPU + * time used when multiple threads were used to perform the + * delta compression. + */ + public long getTimeCompressing() { + return timeCompressing; + } + + /** + * @return time in milliseconds spent writing the pack output, from + * start of header until end of trailer. The transfer speed can + * be approximated by dividing {@link #getTotalBytes()} by this + * value. + */ + public long getTimeWriting() { + return timeWriting; + } + + /** @return total time spent processing this pack. */ + public long getTimeTotal() { + return timeCounting + + timeSearchingForReuse + + timeSearchingForSizes + + timeCompressing + + timeWriting; + } + + /** + * @return get the average output speed in terms of bytes-per-second. + * {@code getTotalBytes() / (getTimeWriting() / 1000.0)}. + */ + public double getTransferRate() { + return getTotalBytes() / (getTimeWriting() / 1000.0); + } + + /** @return formatted message string for display to clients. */ + public String getMessage() { + return MessageFormat.format(JGitText.get().packWriterStatistics, // + Long.valueOf(totalObjects), Long.valueOf(totalDeltas), // + Long.valueOf(reusedObjects), Long.valueOf(reusedDeltas)); + } + } + + private class MutableState { + /** Estimated size of a single ObjectToPack instance. */ + // Assume 64-bit pointers, since this is just an estimate. + private static final long OBJECT_TO_PACK_SIZE = + (2 * 8) // Object header + + (2 * 8) + (2 * 8) // ObjectToPack fields + + (8 + 8) // PackedObjectInfo fields + + 8 // ObjectIdOwnerMap fields + + 40 // AnyObjectId fields + + 8; // Reference in BlockList + + private final long totalDeltaSearchBytes; + + private volatile PackingPhase phase; + + MutableState() { + phase = PackingPhase.COUNTING; + if (config.isDeltaCompress()) { + int threads = config.getThreads(); + if (threads <= 0) + threads = Runtime.getRuntime().availableProcessors(); + totalDeltaSearchBytes = (threads * config.getDeltaSearchMemoryLimit()) + + config.getBigFileThreshold(); + } else + totalDeltaSearchBytes = 0; + } + + State snapshot() { + long objCnt = 0; + objCnt += objectsLists[OBJ_COMMIT].size(); + objCnt += objectsLists[OBJ_TREE].size(); + objCnt += objectsLists[OBJ_BLOB].size(); + objCnt += objectsLists[OBJ_TAG].size(); + // Exclude CachedPacks. + + long bytesUsed = OBJECT_TO_PACK_SIZE * objCnt; + PackingPhase curr = phase; + if (curr == PackingPhase.COMPRESSING) + bytesUsed += totalDeltaSearchBytes; + return new State(curr, bytesUsed); + } + } + + /** Possible states that a PackWriter can be in. */ + public static enum PackingPhase { + /** Counting objects phase. */ + COUNTING, + + /** Getting sizes phase. */ + GETTING_SIZES, + + /** Finding sources phase. */ + FINDING_SOURCES, + + /** Compressing objects phase. */ + COMPRESSING, + + /** Writing objects phase. */ + WRITING, + + /** Building bitmaps phase. */ + BUILDING_BITMAPS; + } + + /** Summary of the current state of a PackWriter. */ + public class State { + private final PackingPhase phase; + + private final long bytesUsed; + + State(PackingPhase phase, long bytesUsed) { + this.phase = phase; + this.bytesUsed = bytesUsed; + } + + /** @return the PackConfig used to build the writer. */ + public PackConfig getConfig() { + return config; + } + + /** @return the current phase of the writer. */ + public PackingPhase getPhase() { + return phase; + } + + /** @return an estimate of the total memory used by the writer. */ + public long estimateBytesUsed() { + return bytesUsed; + } + + @SuppressWarnings("nls") + @Override + public String toString() { + return "PackWriter.State[" + phase + ", memory=" + bytesUsed + "]"; + } + } +} diff --git a/src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3.json b/src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3.json new file mode 100644 index 000000000..b6495da3f --- /dev/null +++ b/src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3.json @@ -0,0 +1 @@ +{"parentCommitId":"21e4aa2b9eaf392825e52ada6034cc3044c69c67","currentCommitId":"5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3","filesBefore":["org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java","org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java","org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java"],"filesCurrent":["org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java","org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java","org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java"],"renamedFilesHint":{},"repositoryDirectoriesBefore":["org.eclipse.jgit/src/org/eclipse/jgit","org.eclipse.jgit/src/org/eclipse/jgit/internal/storage","org.eclipse.jgit/src","org.eclipse.jgit/src/org","org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack","org.eclipse.jgit/src/org/eclipse","org.eclipse.jgit","org.eclipse.jgit/src/org/eclipse/jgit/internal"],"repositoryDirectoriesCurrent":["org.eclipse.jgit/src/org/eclipse/jgit","org.eclipse.jgit/src/org/eclipse/jgit/internal/storage","org.eclipse.jgit/src","org.eclipse.jgit/src/org","org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack","org.eclipse.jgit/src/org/eclipse","org.eclipse.jgit","org.eclipse.jgit/src/org/eclipse/jgit/internal"],"commitTime":0,"authoredTime":0,"commitAuthorName":null} \ No newline at end of file diff --git a/src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java b/src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java new file mode 100644 index 000000000..c4b01949d --- /dev/null +++ b/src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java @@ -0,0 +1,332 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.pack; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.Callable; + +import org.eclipse.jgit.lib.ObjectReader; +import org.eclipse.jgit.lib.ThreadSafeProgressMonitor; +import org.eclipse.jgit.storage.pack.PackConfig; + +final class DeltaTask implements Callable { + static final long MAX_METER = 9 << 20; + + static final class Block { + private static final int MIN_TOP_PATH = 50 << 20; + + final List tasks; + final int threads; + final PackConfig config; + final ObjectReader templateReader; + final DeltaCache dc; + final ThreadSafeProgressMonitor pm; + final ObjectToPack[] list; + final int beginIndex; + final int endIndex; + + private long totalWeight; + private long bytesPerUnit; + + Block(int threads, PackConfig config, ObjectReader reader, + DeltaCache dc, ThreadSafeProgressMonitor pm, + ObjectToPack[] list, int begin, int end) { + this.tasks = new ArrayList(threads); + this.threads = threads; + this.config = config; + this.templateReader = reader; + this.dc = dc; + this.pm = pm; + this.list = list; + this.beginIndex = begin; + this.endIndex = end; + } + + int cost() { + int d = (int) (totalWeight / bytesPerUnit); + if (totalWeight % bytesPerUnit != 0) + d++; + return d; + } + + synchronized DeltaWindow stealWork(DeltaTask forThread) { + for (;;) { + DeltaTask maxTask = null; + Slice maxSlice = null; + int maxWork = 0; + + for (DeltaTask task : tasks) { + Slice s = task.remaining(); + if (s != null && maxWork < s.size()) { + maxTask = task; + maxSlice = s; + maxWork = s.size(); + } + } + if (maxTask == null) + return null; + if (maxTask.tryStealWork(maxSlice)) + return forThread.initWindow(maxSlice); + } + } + + void partitionTasks() { + ArrayList topPaths = computeTopPaths(); + Iterator topPathItr = topPaths.iterator(); + int nextTop = 0; + long weightPerThread = totalWeight / threads; + for (int i = beginIndex; i < endIndex;) { + DeltaTask task = new DeltaTask(this); + long w = 0; + + // Assign the thread one top path. + if (topPathItr.hasNext()) { + WeightedPath p = topPathItr.next(); + w += p.weight; + task.add(p.slice); + } + + // Assign the task thread ~average weight. + int s = i; + for (; w < weightPerThread && i < endIndex;) { + if (nextTop < topPaths.size() + && i == topPaths.get(nextTop).slice.beginIndex) { + if (s < i) + task.add(new Slice(s, i)); + s = i = topPaths.get(nextTop++).slice.endIndex; + } else + w += list[i++].getWeight(); + } + + // Round up the slice to the end of a path. + if (s < i) { + int h = list[i - 1].getPathHash(); + while (i < endIndex) { + if (h == list[i].getPathHash()) + i++; + else + break; + } + task.add(new Slice(s, i)); + } + if (!task.slices.isEmpty()) + tasks.add(task); + } + while (topPathItr.hasNext()) { + WeightedPath p = topPathItr.next(); + DeltaTask task = new DeltaTask(this); + task.add(p.slice); + tasks.add(task); + } + + topPaths = null; + } + + private ArrayList computeTopPaths() { + ArrayList topPaths = new ArrayList( + threads); + int cp = beginIndex; + int ch = list[cp].getPathHash(); + long cw = list[cp].getWeight(); + totalWeight = list[cp].getWeight(); + + for (int i = cp + 1; i < endIndex; i++) { + ObjectToPack o = list[i]; + if (ch != o.getPathHash()) { + if (MIN_TOP_PATH < cw) { + if (topPaths.size() < threads) { + Slice s = new Slice(cp, i); + topPaths.add(new WeightedPath(cw, s)); + if (topPaths.size() == threads) + Collections.sort(topPaths); + } else if (topPaths.get(0).weight < cw) { + Slice s = new Slice(cp, i); + WeightedPath p = new WeightedPath(cw, s); + topPaths.set(0, p); + if (p.compareTo(topPaths.get(1)) > 0) + Collections.sort(topPaths); + } + } + cp = i; + ch = o.getPathHash(); + cw = 0; + } + if (o.isEdge() || o.doNotAttemptDelta()) + continue; + cw += o.getWeight(); + totalWeight += o.getWeight(); + } + + // Sort by starting index to identify gaps later. + Collections.sort(topPaths, new Comparator() { + public int compare(WeightedPath a, WeightedPath b) { + return a.slice.beginIndex - b.slice.beginIndex; + } + }); + + bytesPerUnit = 1; + while (MAX_METER <= (totalWeight / bytesPerUnit)) + bytesPerUnit <<= 10; + return topPaths; + } + } + + static final class WeightedPath implements Comparable { + final long weight; + final Slice slice; + + WeightedPath(long weight, Slice s) { + this.weight = weight; + this.slice = s; + } + + public int compareTo(WeightedPath o) { + int cmp = Long.signum(weight - o.weight); + if (cmp != 0) + return cmp; + return slice.beginIndex - o.slice.beginIndex; + } + } + + static final class Slice { + final int beginIndex; + final int endIndex; + + Slice(int b, int e) { + beginIndex = b; + endIndex = e; + } + + final int size() { + return endIndex - beginIndex; + } + } + + private final Block block; + private final LinkedList slices; + + private ObjectReader or; + private DeltaWindow dw; + + DeltaTask(Block b) { + this.block = b; + this.slices = new LinkedList(); + } + + void add(Slice s) { + if (!slices.isEmpty()) { + Slice last = slices.getLast(); + if (last.endIndex == s.beginIndex) { + slices.removeLast(); + slices.add(new Slice(last.beginIndex, s.endIndex)); + return; + } + } + slices.add(s); + } + + public Object call() throws Exception { + or = block.templateReader.newReader(); + try { + DeltaWindow w; + for (;;) { + synchronized (this) { + if (slices.isEmpty()) + break; + w = initWindow(slices.removeFirst()); + } + runWindow(w); + } + while ((w = block.stealWork(this)) != null) + runWindow(w); + } finally { + block.pm.endWorker(); + or.release(); + or = null; + } + return null; + } + + DeltaWindow initWindow(Slice s) { + DeltaWindow w = new DeltaWindow(block.config, block.dc, + or, block.pm, block.bytesPerUnit, + block.list, s.beginIndex, s.endIndex); + synchronized (this) { + dw = w; + } + return w; + } + + private void runWindow(DeltaWindow w) throws IOException { + try { + w.search(); + } finally { + synchronized (this) { + dw = null; + } + } + } + + synchronized Slice remaining() { + if (!slices.isEmpty()) + return slices.getLast(); + DeltaWindow d = dw; + return d != null ? d.remaining() : null; + } + + synchronized boolean tryStealWork(Slice s) { + if (!slices.isEmpty() && slices.getLast().beginIndex == s.beginIndex) { + slices.removeLast(); + return true; + } + DeltaWindow d = dw; + return d != null ? d.tryStealWork(s) : false; + } +} diff --git a/src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java b/src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java new file mode 100644 index 000000000..19d06a23f --- /dev/null +++ b/src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java @@ -0,0 +1,511 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.pack; + +import java.io.EOFException; +import java.io.IOException; +import java.io.OutputStream; +import java.util.zip.Deflater; + +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.LargeObjectException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.lib.ObjectReader; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.storage.pack.PackConfig; +import org.eclipse.jgit.util.TemporaryBuffer; + +final class DeltaWindow { + private static final boolean NEXT_RES = false; + private static final boolean NEXT_SRC = true; + + private final PackConfig config; + private final DeltaCache deltaCache; + private final ObjectReader reader; + private final ProgressMonitor monitor; + private final long bytesPerUnit; + private long bytesProcessed; + + /** Maximum number of bytes to admit to the window at once. */ + private final long maxMemory; + + /** Maximum depth we should create for any delta chain. */ + private final int maxDepth; + + private final ObjectToPack[] toSearch; + private int cur; + private int end; + + /** Amount of memory we have loaded right now. */ + private long loaded; + + // The object we are currently considering needs a lot of state: + + /** Window entry of the object we are currently considering. */ + private DeltaWindowEntry res; + + /** If we have chosen a base, the window entry it was created from. */ + private DeltaWindowEntry bestBase; + private int deltaLen; + private Object deltaBuf; + + /** Used to compress cached deltas. */ + private Deflater deflater; + + DeltaWindow(PackConfig pc, DeltaCache dc, ObjectReader or, + ProgressMonitor pm, long bpu, + ObjectToPack[] in, int beginIndex, int endIndex) { + config = pc; + deltaCache = dc; + reader = or; + monitor = pm; + bytesPerUnit = bpu; + toSearch = in; + cur = beginIndex; + end = endIndex; + + maxMemory = Math.max(0, config.getDeltaSearchMemoryLimit()); + maxDepth = config.getMaxDeltaDepth(); + res = DeltaWindowEntry.createWindow(config.getDeltaSearchWindowSize()); + } + + synchronized DeltaTask.Slice remaining() { + int e = end; + int halfRemaining = (e - cur) >>> 1; + if (0 == halfRemaining) + return null; + + int split = e - halfRemaining; + int h = toSearch[split].getPathHash(); + + // Attempt to split on the next path after the 50% split point. + for (int n = split + 1; n < e; n++) { + if (h != toSearch[n].getPathHash()) + return new DeltaTask.Slice(n, e); + } + + if (h != toSearch[cur].getPathHash()) { + // Try to split on the path before the 50% split point. + // Do not split the path currently being processed. + for (int p = split - 1; cur < p; p--) { + if (h != toSearch[p].getPathHash()) + return new DeltaTask.Slice(p + 1, e); + } + } + return null; + } + + synchronized boolean tryStealWork(DeltaTask.Slice s) { + if (s.beginIndex <= cur || end <= s.beginIndex) + return false; + end = s.beginIndex; + return true; + } + + void search() throws IOException { + try { + for (;;) { + ObjectToPack next; + synchronized (this) { + if (end <= cur) + break; + next = toSearch[cur++]; + } + if (maxMemory != 0) { + clear(res); + final long need = estimateSize(next); + DeltaWindowEntry n = res.next; + for (; maxMemory < loaded + need && n != res; n = n.next) + clear(n); + } + res.set(next); + + if (res.object.isEdge() || res.object.doNotAttemptDelta()) { + // We don't actually want to make a delta for + // them, just need to push them into the window + // so they can be read by other objects. + keepInWindow(); + } else { + // Search for a delta for the current window slot. + if (bytesPerUnit <= (bytesProcessed += next.getWeight())) { + int d = (int) (bytesProcessed / bytesPerUnit); + monitor.update(d); + bytesProcessed -= d * bytesPerUnit; + } + searchInWindow(); + } + } + } finally { + if (deflater != null) + deflater.end(); + } + } + + private static long estimateSize(ObjectToPack ent) { + return DeltaIndex.estimateIndexSize(ent.getWeight()); + } + + private static long estimateIndexSize(DeltaWindowEntry ent) { + if (ent.buffer == null) + return estimateSize(ent.object); + + int len = ent.buffer.length; + return DeltaIndex.estimateIndexSize(len) - len; + } + + private void clear(DeltaWindowEntry ent) { + if (ent.index != null) + loaded -= ent.index.getIndexSize(); + else if (ent.buffer != null) + loaded -= ent.buffer.length; + ent.set(null); + } + + private void searchInWindow() throws IOException { + // Loop through the window backwards, considering every entry. + // This lets us look at the bigger objects that came before. + for (DeltaWindowEntry src = res.prev; src != res; src = src.prev) { + if (src.empty()) + break; + if (delta(src) /* == NEXT_SRC */) + continue; + bestBase = null; + deltaBuf = null; + return; + } + + // We couldn't find a suitable delta for this object, but it may + // still be able to act as a base for another one. + if (bestBase == null) { + keepInWindow(); + return; + } + + // Select this best matching delta as the base for the object. + // + ObjectToPack srcObj = bestBase.object; + ObjectToPack resObj = res.object; + if (srcObj.isEdge()) { + // The source (the delta base) is an edge object outside of the + // pack. Its part of the common base set that the peer already + // has on hand, so we don't want to send it. We have to store + // an ObjectId and *NOT* an ObjectToPack for the base to ensure + // the base isn't included in the outgoing pack file. + resObj.setDeltaBase(srcObj.copy()); + } else { + // The base is part of the pack we are sending, so it should be + // a direct pointer to the base. + resObj.setDeltaBase(srcObj); + } + + int depth = srcObj.getDeltaDepth() + 1; + resObj.setDeltaDepth(depth); + resObj.clearReuseAsIs(); + cacheDelta(srcObj, resObj); + + if (depth < maxDepth) { + // Reorder the window so that the best base will be tested + // first for the next object, and the current object will + // be the second candidate to consider before any others. + res.makeNext(bestBase); + res = bestBase.next; + } + + bestBase = null; + deltaBuf = null; + } + + private boolean delta(final DeltaWindowEntry src) + throws IOException { + // Objects must use only the same type as their delta base. + if (src.type() != res.type()) { + keepInWindow(); + return NEXT_RES; + } + + // If the sizes are radically different, this is a bad pairing. + if (res.size() < src.size() >>> 4) + return NEXT_SRC; + + int msz = deltaSizeLimit(src); + if (msz <= 8) // Nearly impossible to fit useful delta. + return NEXT_SRC; + + // If we have to insert a lot to make this work, find another. + if (res.size() - src.size() > msz) + return NEXT_SRC; + + DeltaIndex srcIndex; + try { + srcIndex = index(src); + } catch (LargeObjectException tooBig) { + // If the source is too big to work on, skip it. + return NEXT_SRC; + } catch (IOException notAvailable) { + if (src.object.isEdge()) // Missing edges are OK. + return NEXT_SRC; + throw notAvailable; + } + + byte[] resBuf; + try { + resBuf = buffer(res); + } catch (LargeObjectException tooBig) { + // If its too big, move on to another item. + return NEXT_RES; + } + + try { + OutputStream delta = msz <= (8 << 10) + ? new ArrayStream(msz) + : new TemporaryBuffer.Heap(msz); + if (srcIndex.encode(delta, resBuf, msz)) + selectDeltaBase(src, delta); + } catch (IOException deltaTooBig) { + // Unlikely, encoder should see limit and return false. + } + return NEXT_SRC; + } + + private void selectDeltaBase(DeltaWindowEntry src, OutputStream delta) { + bestBase = src; + + if (delta instanceof ArrayStream) { + ArrayStream a = (ArrayStream) delta; + deltaBuf = a.buf; + deltaLen = a.cnt; + } else { + TemporaryBuffer.Heap b = (TemporaryBuffer.Heap) delta; + deltaBuf = b; + deltaLen = (int) b.length(); + } + } + + private int deltaSizeLimit(DeltaWindowEntry src) { + if (bestBase == null) { + // Any delta should be no more than 50% of the original size + // (for text files deflate of whole form should shrink 50%). + int n = res.size() >>> 1; + + // Evenly distribute delta size limits over allowed depth. + // If src is non-delta (depth = 0), delta <= 50% of original. + // If src is almost at limit (9/10), delta <= 10% of original. + return n * (maxDepth - src.depth()) / maxDepth; + } + + // With a delta base chosen any new delta must be "better". + // Retain the distribution described above. + int d = bestBase.depth(); + int n = deltaLen; + + // If src is whole (depth=0) and base is near limit (depth=9/10) + // any delta using src can be 10x larger and still be better. + // + // If src is near limit (depth=9/10) and base is whole (depth=0) + // a new delta dependent on src must be 1/10th the size. + return n * (maxDepth - src.depth()) / (maxDepth - d); + } + + private void cacheDelta(ObjectToPack srcObj, ObjectToPack resObj) { + if (deltaCache.canCache(deltaLen, srcObj, resObj)) { + try { + byte[] zbuf = new byte[deflateBound(deltaLen)]; + ZipStream zs = new ZipStream(deflater(), zbuf); + if (deltaBuf instanceof byte[]) + zs.write((byte[]) deltaBuf, 0, deltaLen); + else + ((TemporaryBuffer.Heap) deltaBuf).writeTo(zs, null); + deltaBuf = null; + int len = zs.finish(); + + resObj.setCachedDelta(deltaCache.cache(zbuf, len, deltaLen)); + resObj.setCachedSize(deltaLen); + } catch (IOException err) { + deltaCache.credit(deltaLen); + } catch (OutOfMemoryError err) { + deltaCache.credit(deltaLen); + } + } + } + + private static int deflateBound(int insz) { + return insz + ((insz + 7) >> 3) + ((insz + 63) >> 6) + 11; + } + + private void keepInWindow() { + res = res.next; + } + + private DeltaIndex index(DeltaWindowEntry ent) + throws MissingObjectException, IncorrectObjectTypeException, + IOException, LargeObjectException { + DeltaIndex idx = ent.index; + if (idx == null) { + checkLoadable(ent, estimateIndexSize(ent)); + + try { + idx = new DeltaIndex(buffer(ent)); + } catch (OutOfMemoryError noMemory) { + LargeObjectException.OutOfMemory e; + e = new LargeObjectException.OutOfMemory(noMemory); + e.setObjectId(ent.object); + throw e; + } + if (maxMemory != 0) + loaded += idx.getIndexSize() - idx.getSourceSize(); + ent.index = idx; + } + return idx; + } + + private byte[] buffer(DeltaWindowEntry ent) throws MissingObjectException, + IncorrectObjectTypeException, IOException, LargeObjectException { + byte[] buf = ent.buffer; + if (buf == null) { + checkLoadable(ent, ent.size()); + + buf = PackWriter.buffer(config, reader, ent.object); + if (maxMemory != 0) + loaded += buf.length; + ent.buffer = buf; + } + return buf; + } + + private void checkLoadable(DeltaWindowEntry ent, long need) { + if (maxMemory == 0) + return; + + DeltaWindowEntry n = res.next; + for (; maxMemory < loaded + need; n = n.next) { + clear(n); + if (n == ent) + throw new LargeObjectException.ExceedsLimit( + maxMemory, loaded + need); + } + } + + private Deflater deflater() { + if (deflater == null) + deflater = new Deflater(config.getCompressionLevel()); + else + deflater.reset(); + return deflater; + } + + static final class ZipStream extends OutputStream { + private final Deflater deflater; + + private final byte[] zbuf; + + private int outPtr; + + ZipStream(Deflater deflater, byte[] zbuf) { + this.deflater = deflater; + this.zbuf = zbuf; + } + + int finish() throws IOException { + deflater.finish(); + for (;;) { + if (outPtr == zbuf.length) + throw new EOFException(); + + int n = deflater.deflate(zbuf, outPtr, zbuf.length - outPtr); + if (n == 0) { + if (deflater.finished()) + return outPtr; + throw new IOException(); + } + outPtr += n; + } + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + deflater.setInput(b, off, len); + for (;;) { + if (outPtr == zbuf.length) + throw new EOFException(); + + int n = deflater.deflate(zbuf, outPtr, zbuf.length - outPtr); + if (n == 0) { + if (deflater.needsInput()) + break; + throw new IOException(); + } + outPtr += n; + } + } + + @Override + public void write(int b) throws IOException { + throw new UnsupportedOperationException(); + } + } + + static final class ArrayStream extends OutputStream { + final byte[] buf; + int cnt; + + ArrayStream(int max) { + buf = new byte[max]; + } + + @Override + public void write(int b) throws IOException { + if (cnt == buf.length) + throw new IOException(); + buf[cnt++] = (byte) b; + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + if (len > buf.length - cnt) + throw new IOException(); + System.arraycopy(b, off, buf, cnt, len); + cnt += len; + } + } +} diff --git a/src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java b/src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java new file mode 100644 index 000000000..a7122592f --- /dev/null +++ b/src/test/resources/oracle/commits/jgit-5d8a9f6f3f43ac43c6b1c48cdfad55e545171ea3/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java @@ -0,0 +1,2475 @@ +/* + * Copyright (C) 2008-2010, Google Inc. + * Copyright (C) 2008, Marek Zawirski + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.pack; + +import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA; +import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_WHOLE; +import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH; +import static org.eclipse.jgit.lib.Constants.OBJ_BLOB; +import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT; +import static org.eclipse.jgit.lib.Constants.OBJ_TAG; +import static org.eclipse.jgit.lib.Constants.OBJ_TREE; + +import java.io.IOException; +import java.io.OutputStream; +import java.lang.ref.WeakReference; +import java.security.MessageDigest; +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executor; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.zip.CRC32; +import java.util.zip.CheckedOutputStream; +import java.util.zip.Deflater; +import java.util.zip.DeflaterOutputStream; + +import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.LargeObjectException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException; +import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.internal.storage.file.PackBitmapIndexBuilder; +import org.eclipse.jgit.internal.storage.file.PackBitmapIndexWriterV1; +import org.eclipse.jgit.internal.storage.file.PackIndexWriter; +import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.AsyncObjectSizeQueue; +import org.eclipse.jgit.lib.BatchingProgressMonitor; +import org.eclipse.jgit.lib.BitmapIndex; +import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder; +import org.eclipse.jgit.lib.BitmapObject; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.NullProgressMonitor; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectIdOwnerMap; +import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.ObjectReader; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.lib.ThreadSafeProgressMonitor; +import org.eclipse.jgit.revwalk.AsyncRevObjectQueue; +import org.eclipse.jgit.revwalk.DepthWalk; +import org.eclipse.jgit.revwalk.ObjectWalk; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevFlag; +import org.eclipse.jgit.revwalk.RevObject; +import org.eclipse.jgit.revwalk.RevSort; +import org.eclipse.jgit.revwalk.RevTag; +import org.eclipse.jgit.revwalk.RevTree; +import org.eclipse.jgit.storage.pack.PackConfig; +import org.eclipse.jgit.util.BlockList; +import org.eclipse.jgit.util.TemporaryBuffer; + +/** + *

+ * PackWriter class is responsible for generating pack files from specified set + * of objects from repository. This implementation produce pack files in format + * version 2. + *

+ *

+ * Source of objects may be specified in two ways: + *

    + *
  • (usually) by providing sets of interesting and uninteresting objects in + * repository - all interesting objects and their ancestors except uninteresting + * objects and their ancestors will be included in pack, or
  • + *
  • by providing iterator of {@link RevObject} specifying exact list and + * order of objects in pack
  • + *
+ * Typical usage consists of creating instance intended for some pack, + * configuring options, preparing the list of objects by calling + * {@link #preparePack(Iterator)} or + * {@link #preparePack(ProgressMonitor, Collection, Collection)}, and finally + * producing the stream with {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}. + *

+ *

+ * Class provide set of configurable options and {@link ProgressMonitor} + * support, as operations may take a long time for big repositories. Deltas + * searching algorithm is NOT IMPLEMENTED yet - this implementation + * relies only on deltas and objects reuse. + *

+ *

+ * This class is not thread safe, it is intended to be used in one thread, with + * one instance per created pack. Subsequent calls to writePack result in + * undefined behavior. + *

+ */ +public class PackWriter { + private static final int PACK_VERSION_GENERATED = 2; + + /** A collection of object ids. */ + public interface ObjectIdSet { + /** + * Returns true if the objectId is contained within the collection. + * + * @param objectId + * the objectId to find + * @return whether the collection contains the objectId. + */ + boolean contains(AnyObjectId objectId); + } + + private static final Map, Boolean> instances = + new ConcurrentHashMap, Boolean>(); + + private static final Iterable instancesIterable = new Iterable() { + public Iterator iterator() { + return new Iterator() { + private final Iterator> it = + instances.keySet().iterator(); + private PackWriter next; + + public boolean hasNext() { + if (next != null) + return true; + while (it.hasNext()) { + WeakReference ref = it.next(); + next = ref.get(); + if (next != null) + return true; + it.remove(); + } + return false; + } + + public PackWriter next() { + if (hasNext()) { + PackWriter result = next; + next = null; + return result; + } + throw new NoSuchElementException(); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + + /** @return all allocated, non-released PackWriters instances. */ + public static Iterable getInstances() { + return instancesIterable; + } + + @SuppressWarnings("unchecked") + private final BlockList objectsLists[] = new BlockList[OBJ_TAG + 1]; + { + objectsLists[OBJ_COMMIT] = new BlockList(); + objectsLists[OBJ_TREE] = new BlockList(); + objectsLists[OBJ_BLOB] = new BlockList(); + objectsLists[OBJ_TAG] = new BlockList(); + } + + private final ObjectIdOwnerMap objectsMap = new ObjectIdOwnerMap(); + + // edge objects for thin packs + private List edgeObjects = new BlockList(); + + // Objects the client is known to have already. + private BitmapBuilder haveObjects; + + private List cachedPacks = new ArrayList(2); + + private Set tagTargets = Collections.emptySet(); + + private ObjectIdSet[] excludeInPacks; + + private ObjectIdSet excludeInPackLast; + + private Deflater myDeflater; + + private final ObjectReader reader; + + /** {@link #reader} recast to the reuse interface, if it supports it. */ + private final ObjectReuseAsIs reuseSupport; + + private final PackConfig config; + + private final Statistics stats; + + private final MutableState state; + + private final WeakReference selfRef; + + private Statistics.ObjectType typeStats; + + private List sortedByName; + + private byte packcsum[]; + + private boolean deltaBaseAsOffset; + + private boolean reuseDeltas; + + private boolean reuseDeltaCommits; + + private boolean reuseValidate; + + private boolean thin; + + private boolean useCachedPacks; + + private boolean useBitmaps; + + private boolean ignoreMissingUninteresting = true; + + private boolean pruneCurrentObjectList; + + private boolean shallowPack; + + private boolean canBuildBitmaps; + + private boolean indexDisabled; + + private int depth; + + private Collection unshallowObjects; + + private PackBitmapIndexBuilder writeBitmaps; + + private CRC32 crc32; + + /** + * Create writer for specified repository. + *

+ * Objects for packing are specified in {@link #preparePack(Iterator)} or + * {@link #preparePack(ProgressMonitor, Collection, Collection)}. + * + * @param repo + * repository where objects are stored. + */ + public PackWriter(final Repository repo) { + this(repo, repo.newObjectReader()); + } + + /** + * Create a writer to load objects from the specified reader. + *

+ * Objects for packing are specified in {@link #preparePack(Iterator)} or + * {@link #preparePack(ProgressMonitor, Collection, Collection)}. + * + * @param reader + * reader to read from the repository with. + */ + public PackWriter(final ObjectReader reader) { + this(new PackConfig(), reader); + } + + /** + * Create writer for specified repository. + *

+ * Objects for packing are specified in {@link #preparePack(Iterator)} or + * {@link #preparePack(ProgressMonitor, Collection, Collection)}. + * + * @param repo + * repository where objects are stored. + * @param reader + * reader to read from the repository with. + */ + public PackWriter(final Repository repo, final ObjectReader reader) { + this(new PackConfig(repo), reader); + } + + /** + * Create writer with a specified configuration. + *

+ * Objects for packing are specified in {@link #preparePack(Iterator)} or + * {@link #preparePack(ProgressMonitor, Collection, Collection)}. + * + * @param config + * configuration for the pack writer. + * @param reader + * reader to read from the repository with. + */ + public PackWriter(final PackConfig config, final ObjectReader reader) { + this.config = config; + this.reader = reader; + if (reader instanceof ObjectReuseAsIs) + reuseSupport = ((ObjectReuseAsIs) reader); + else + reuseSupport = null; + + deltaBaseAsOffset = config.isDeltaBaseAsOffset(); + reuseDeltas = config.isReuseDeltas(); + reuseValidate = true; // be paranoid by default + stats = new Statistics(); + state = new MutableState(); + selfRef = new WeakReference(this); + instances.put(selfRef, Boolean.TRUE); + } + + /** + * Check whether writer can store delta base as an offset (new style + * reducing pack size) or should store it as an object id (legacy style, + * compatible with old readers). + * + * Default setting: {@value PackConfig#DEFAULT_DELTA_BASE_AS_OFFSET} + * + * @return true if delta base is stored as an offset; false if it is stored + * as an object id. + */ + public boolean isDeltaBaseAsOffset() { + return deltaBaseAsOffset; + } + + /** + * Set writer delta base format. Delta base can be written as an offset in a + * pack file (new approach reducing file size) or as an object id (legacy + * approach, compatible with old readers). + * + * Default setting: {@value PackConfig#DEFAULT_DELTA_BASE_AS_OFFSET} + * + * @param deltaBaseAsOffset + * boolean indicating whether delta base can be stored as an + * offset. + */ + public void setDeltaBaseAsOffset(boolean deltaBaseAsOffset) { + this.deltaBaseAsOffset = deltaBaseAsOffset; + } + + /** + * Check if the writer will reuse commits that are already stored as deltas. + * + * @return true if the writer would reuse commits stored as deltas, assuming + * delta reuse is already enabled. + */ + public boolean isReuseDeltaCommits() { + return reuseDeltaCommits; + } + + /** + * Set the writer to reuse existing delta versions of commits. + * + * @param reuse + * if true, the writer will reuse any commits stored as deltas. + * By default the writer does not reuse delta commits. + */ + public void setReuseDeltaCommits(boolean reuse) { + reuseDeltaCommits = reuse; + } + + /** + * Check if the writer validates objects before copying them. + * + * @return true if validation is enabled; false if the reader will handle + * object validation as a side-effect of it consuming the output. + */ + public boolean isReuseValidatingObjects() { + return reuseValidate; + } + + /** + * Enable (or disable) object validation during packing. + * + * @param validate + * if true the pack writer will validate an object before it is + * put into the output. This additional validation work may be + * necessary to avoid propagating corruption from one local pack + * file to another local pack file. + */ + public void setReuseValidatingObjects(boolean validate) { + reuseValidate = validate; + } + + /** @return true if this writer is producing a thin pack. */ + public boolean isThin() { + return thin; + } + + /** + * @param packthin + * a boolean indicating whether writer may pack objects with + * delta base object not within set of objects to pack, but + * belonging to party repository (uninteresting/boundary) as + * determined by set; this kind of pack is used only for + * transport; true - to produce thin pack, false - otherwise. + */ + public void setThin(final boolean packthin) { + thin = packthin; + } + + /** @return true to reuse cached packs. If true index creation isn't available. */ + public boolean isUseCachedPacks() { + return useCachedPacks; + } + + /** + * @param useCached + * if set to true and a cached pack is present, it will be + * appended onto the end of a thin-pack, reducing the amount of + * working set space and CPU used by PackWriter. Enabling this + * feature prevents PackWriter from creating an index for the + * newly created pack, so its only suitable for writing to a + * network client, where the client will make the index. + */ + public void setUseCachedPacks(boolean useCached) { + useCachedPacks = useCached; + } + + /** @return true to use bitmaps for ObjectWalks, if available. */ + public boolean isUseBitmaps() { + return useBitmaps; + } + + /** + * @param useBitmaps + * if set to true, bitmaps will be used when preparing a pack. + */ + public void setUseBitmaps(boolean useBitmaps) { + this.useBitmaps = useBitmaps; + } + + /** @return true if the index file cannot be created by this PackWriter. */ + public boolean isIndexDisabled() { + return indexDisabled || !cachedPacks.isEmpty(); + } + + /** + * @param noIndex + * true to disable creation of the index file. + */ + public void setIndexDisabled(boolean noIndex) { + this.indexDisabled = noIndex; + } + + /** + * @return true to ignore objects that are uninteresting and also not found + * on local disk; false to throw a {@link MissingObjectException} + * out of {@link #preparePack(ProgressMonitor, Collection, Collection)} if an + * uninteresting object is not in the source repository. By default, + * true, permitting gracefully ignoring of uninteresting objects. + */ + public boolean isIgnoreMissingUninteresting() { + return ignoreMissingUninteresting; + } + + /** + * @param ignore + * true if writer should ignore non existing uninteresting + * objects during construction set of objects to pack; false + * otherwise - non existing uninteresting objects may cause + * {@link MissingObjectException} + */ + public void setIgnoreMissingUninteresting(final boolean ignore) { + ignoreMissingUninteresting = ignore; + } + + /** + * Set the tag targets that should be hoisted earlier during packing. + *

+ * Callers may put objects into this set before invoking any of the + * preparePack methods to influence where an annotated tag's target is + * stored within the resulting pack. Typically these will be clustered + * together, and hoisted earlier in the file even if they are ancient + * revisions, allowing readers to find tag targets with better locality. + * + * @param objects + * objects that annotated tags point at. + */ + public void setTagTargets(Set objects) { + tagTargets = objects; + } + + /** + * Configure this pack for a shallow clone. + * + * @param depth + * maximum depth to traverse the commit graph + * @param unshallow + * objects which used to be shallow on the client, but are being + * extended as part of this fetch + */ + public void setShallowPack(int depth, + Collection unshallow) { + this.shallowPack = true; + this.depth = depth; + this.unshallowObjects = unshallow; + } + + /** + * Returns objects number in a pack file that was created by this writer. + * + * @return number of objects in pack. + * @throws IOException + * a cached pack cannot supply its object count. + */ + public long getObjectCount() throws IOException { + if (stats.totalObjects == 0) { + long objCnt = 0; + + objCnt += objectsLists[OBJ_COMMIT].size(); + objCnt += objectsLists[OBJ_TREE].size(); + objCnt += objectsLists[OBJ_BLOB].size(); + objCnt += objectsLists[OBJ_TAG].size(); + + for (CachedPack pack : cachedPacks) + objCnt += pack.getObjectCount(); + return objCnt; + } + return stats.totalObjects; + } + + /** + * Returns the object ids in the pack file that was created by this writer. + * + * This method can only be invoked after + * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has + * been invoked and completed successfully. + * + * @return number of objects in pack. + * @throws IOException + * a cached pack cannot supply its object ids. + */ + public ObjectIdOwnerMap getObjectSet() + throws IOException { + if (!cachedPacks.isEmpty()) + throw new IOException( + JGitText.get().cachedPacksPreventsListingObjects); + + ObjectIdOwnerMap objs = new ObjectIdOwnerMap< + ObjectIdOwnerMap.Entry>(); + for (BlockList objList : objectsLists) { + if (objList != null) { + for (ObjectToPack otp : objList) + objs.add(new ObjectIdOwnerMap.Entry(otp) { + // A new entry that copies the ObjectId + }); + } + } + return objs; + } + + /** + * Add a pack index whose contents should be excluded from the result. + * + * @param idx + * objects in this index will not be in the output pack. + */ + public void excludeObjects(ObjectIdSet idx) { + if (excludeInPacks == null) { + excludeInPacks = new ObjectIdSet[] { idx }; + excludeInPackLast = idx; + } else { + int cnt = excludeInPacks.length; + ObjectIdSet[] newList = new ObjectIdSet[cnt + 1]; + System.arraycopy(excludeInPacks, 0, newList, 0, cnt); + newList[cnt] = idx; + excludeInPacks = newList; + } + } + + /** + * Prepare the list of objects to be written to the pack stream. + *

+ * Iterator exactly determines which objects are included in a pack + * and order they appear in pack (except that objects order by type is not + * needed at input). This order should conform general rules of ordering + * objects in git - by recency and path (type and delta-base first is + * internally secured) and responsibility for guaranteeing this order is on + * a caller side. Iterator must return each id of object to write exactly + * once. + *

+ * + * @param objectsSource + * iterator of object to store in a pack; order of objects within + * each type is important, ordering by type is not needed; + * allowed types for objects are {@link Constants#OBJ_COMMIT}, + * {@link Constants#OBJ_TREE}, {@link Constants#OBJ_BLOB} and + * {@link Constants#OBJ_TAG}; objects returned by iterator may be + * later reused by caller as object id and type are internally + * copied in each iteration. + * @throws IOException + * when some I/O problem occur during reading objects. + */ + public void preparePack(final Iterator objectsSource) + throws IOException { + while (objectsSource.hasNext()) { + addObject(objectsSource.next()); + } + } + + /** + * Prepare the list of objects to be written to the pack stream. + *

+ * Basing on these 2 sets, another set of objects to put in a pack file is + * created: this set consists of all objects reachable (ancestors) from + * interesting objects, except uninteresting objects and their ancestors. + * This method uses class {@link ObjectWalk} extensively to find out that + * appropriate set of output objects and their optimal order in output pack. + * Order is consistent with general git in-pack rules: sort by object type, + * recency, path and delta-base first. + *

+ * + * @param countingMonitor + * progress during object enumeration. + * @param want + * collection of objects to be marked as interesting (start + * points of graph traversal). + * @param have + * collection of objects to be marked as uninteresting (end + * points of graph traversal). + * @throws IOException + * when some I/O problem occur during reading objects. + * @deprecated to be removed in 2.0; use the Set version of this method. + */ + @Deprecated + public void preparePack(ProgressMonitor countingMonitor, + final Collection want, + final Collection have) throws IOException { + preparePack(countingMonitor, ensureSet(want), ensureSet(have)); + } + + /** + * Prepare the list of objects to be written to the pack stream. + *

+ * Basing on these 2 sets, another set of objects to put in a pack file is + * created: this set consists of all objects reachable (ancestors) from + * interesting objects, except uninteresting objects and their ancestors. + * This method uses class {@link ObjectWalk} extensively to find out that + * appropriate set of output objects and their optimal order in output pack. + * Order is consistent with general git in-pack rules: sort by object type, + * recency, path and delta-base first. + *

+ * + * @param countingMonitor + * progress during object enumeration. + * @param walk + * ObjectWalk to perform enumeration. + * @param interestingObjects + * collection of objects to be marked as interesting (start + * points of graph traversal). + * @param uninterestingObjects + * collection of objects to be marked as uninteresting (end + * points of graph traversal). + * @throws IOException + * when some I/O problem occur during reading objects. + * @deprecated to be removed in 2.0; use the Set version of this method. + */ + @Deprecated + public void preparePack(ProgressMonitor countingMonitor, + ObjectWalk walk, + final Collection interestingObjects, + final Collection uninterestingObjects) + throws IOException { + preparePack(countingMonitor, walk, + ensureSet(interestingObjects), + ensureSet(uninterestingObjects)); + } + + @SuppressWarnings("unchecked") + private static Set ensureSet(Collection objs) { + Set set; + if (objs instanceof Set) + set = (Set) objs; + else if (objs == null) + set = Collections.emptySet(); + else + set = new HashSet(objs); + return set; + } + + /** + * Prepare the list of objects to be written to the pack stream. + *

+ * Basing on these 2 sets, another set of objects to put in a pack file is + * created: this set consists of all objects reachable (ancestors) from + * interesting objects, except uninteresting objects and their ancestors. + * This method uses class {@link ObjectWalk} extensively to find out that + * appropriate set of output objects and their optimal order in output pack. + * Order is consistent with general git in-pack rules: sort by object type, + * recency, path and delta-base first. + *

+ * + * @param countingMonitor + * progress during object enumeration. + * @param want + * collection of objects to be marked as interesting (start + * points of graph traversal). + * @param have + * collection of objects to be marked as uninteresting (end + * points of graph traversal). + * @throws IOException + * when some I/O problem occur during reading objects. + */ + public void preparePack(ProgressMonitor countingMonitor, + Set want, + Set have) throws IOException { + ObjectWalk ow; + if (shallowPack) + ow = new DepthWalk.ObjectWalk(reader, depth); + else + ow = new ObjectWalk(reader); + preparePack(countingMonitor, ow, want, have); + } + + /** + * Prepare the list of objects to be written to the pack stream. + *

+ * Basing on these 2 sets, another set of objects to put in a pack file is + * created: this set consists of all objects reachable (ancestors) from + * interesting objects, except uninteresting objects and their ancestors. + * This method uses class {@link ObjectWalk} extensively to find out that + * appropriate set of output objects and their optimal order in output pack. + * Order is consistent with general git in-pack rules: sort by object type, + * recency, path and delta-base first. + *

+ * + * @param countingMonitor + * progress during object enumeration. + * @param walk + * ObjectWalk to perform enumeration. + * @param interestingObjects + * collection of objects to be marked as interesting (start + * points of graph traversal). + * @param uninterestingObjects + * collection of objects to be marked as uninteresting (end + * points of graph traversal). + * @throws IOException + * when some I/O problem occur during reading objects. + */ + public void preparePack(ProgressMonitor countingMonitor, + ObjectWalk walk, + final Set interestingObjects, + final Set uninterestingObjects) + throws IOException { + if (countingMonitor == null) + countingMonitor = NullProgressMonitor.INSTANCE; + if (shallowPack && !(walk instanceof DepthWalk.ObjectWalk)) + walk = new DepthWalk.ObjectWalk(reader, depth); + findObjectsToPack(countingMonitor, walk, interestingObjects, + uninterestingObjects); + } + + /** + * Determine if the pack file will contain the requested object. + * + * @param id + * the object to test the existence of. + * @return true if the object will appear in the output pack file. + * @throws IOException + * a cached pack cannot be examined. + */ + public boolean willInclude(final AnyObjectId id) throws IOException { + ObjectToPack obj = objectsMap.get(id); + return obj != null && !obj.isEdge(); + } + + /** + * Lookup the ObjectToPack object for a given ObjectId. + * + * @param id + * the object to find in the pack. + * @return the object we are packing, or null. + */ + public ObjectToPack get(AnyObjectId id) { + ObjectToPack obj = objectsMap.get(id); + return obj != null && !obj.isEdge() ? obj : null; + } + + /** + * Computes SHA-1 of lexicographically sorted objects ids written in this + * pack, as used to name a pack file in repository. + * + * @return ObjectId representing SHA-1 name of a pack that was created. + */ + public ObjectId computeName() { + final byte[] buf = new byte[OBJECT_ID_LENGTH]; + final MessageDigest md = Constants.newMessageDigest(); + for (ObjectToPack otp : sortByName()) { + otp.copyRawTo(buf, 0); + md.update(buf, 0, OBJECT_ID_LENGTH); + } + return ObjectId.fromRaw(md.digest()); + } + + /** + * Returns the index format version that will be written. + *

+ * This method can only be invoked after + * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has + * been invoked and completed successfully. + * + * @return the index format version. + */ + public int getIndexVersion() { + int indexVersion = config.getIndexVersion(); + if (indexVersion <= 0) { + for (BlockList objs : objectsLists) + indexVersion = Math.max(indexVersion, + PackIndexWriter.oldestPossibleFormat(objs)); + } + return indexVersion; + } + + /** + * Create an index file to match the pack file just written. + *

+ * This method can only be invoked after + * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has + * been invoked and completed successfully. Writing a corresponding index is + * an optional feature that not all pack users may require. + * + * @param indexStream + * output for the index data. Caller is responsible for closing + * this stream. + * @throws IOException + * the index data could not be written to the supplied stream. + */ + public void writeIndex(final OutputStream indexStream) throws IOException { + if (isIndexDisabled()) + throw new IOException(JGitText.get().cachedPacksPreventsIndexCreation); + + long writeStart = System.currentTimeMillis(); + final PackIndexWriter iw = PackIndexWriter.createVersion( + indexStream, getIndexVersion()); + iw.write(sortByName(), packcsum); + stats.timeWriting += System.currentTimeMillis() - writeStart; + } + + /** + * Create a bitmap index file to match the pack file just written. + *

+ * This method can only be invoked after + * {@link #prepareBitmapIndex(ProgressMonitor)} has been invoked and + * completed successfully. Writing a corresponding bitmap index is an + * optional feature that not all pack users may require. + * + * @param bitmapIndexStream + * output for the bitmap index data. Caller is responsible for + * closing this stream. + * @throws IOException + * the index data could not be written to the supplied stream. + */ + public void writeBitmapIndex(final OutputStream bitmapIndexStream) + throws IOException { + if (writeBitmaps == null) + throw new IOException(JGitText.get().bitmapsMustBePrepared); + + long writeStart = System.currentTimeMillis(); + final PackBitmapIndexWriterV1 iw = new PackBitmapIndexWriterV1(bitmapIndexStream); + iw.write(writeBitmaps, packcsum); + stats.timeWriting += System.currentTimeMillis() - writeStart; + } + + private List sortByName() { + if (sortedByName == null) { + int cnt = 0; + cnt += objectsLists[OBJ_COMMIT].size(); + cnt += objectsLists[OBJ_TREE].size(); + cnt += objectsLists[OBJ_BLOB].size(); + cnt += objectsLists[OBJ_TAG].size(); + + sortedByName = new BlockList(cnt); + sortedByName.addAll(objectsLists[OBJ_COMMIT]); + sortedByName.addAll(objectsLists[OBJ_TREE]); + sortedByName.addAll(objectsLists[OBJ_BLOB]); + sortedByName.addAll(objectsLists[OBJ_TAG]); + Collections.sort(sortedByName); + } + return sortedByName; + } + + private void beginPhase(PackingPhase phase, ProgressMonitor monitor, + long cnt) { + state.phase = phase; + String task; + switch (phase) { + case COUNTING: + task = JGitText.get().countingObjects; + break; + case GETTING_SIZES: + task = JGitText.get().searchForSizes; + break; + case FINDING_SOURCES: + task = JGitText.get().searchForReuse; + break; + case COMPRESSING: + task = JGitText.get().compressingObjects; + break; + case WRITING: + task = JGitText.get().writingObjects; + break; + case BUILDING_BITMAPS: + task = JGitText.get().buildingBitmaps; + break; + default: + throw new IllegalArgumentException( + MessageFormat.format(JGitText.get().illegalPackingPhase, phase)); + } + monitor.beginTask(task, (int) cnt); + } + + private void endPhase(ProgressMonitor monitor) { + monitor.endTask(); + } + + /** + * Write the prepared pack to the supplied stream. + *

+ * At first, this method collects and sorts objects to pack, then deltas + * search is performed if set up accordingly, finally pack stream is + * written. + *

+ *

+ * All reused objects data checksum (Adler32/CRC32) is computed and + * validated against existing checksum. + *

+ * + * @param compressMonitor + * progress monitor to report object compression work. + * @param writeMonitor + * progress monitor to report the number of objects written. + * @param packStream + * output stream of pack data. The stream should be buffered by + * the caller. The caller is responsible for closing the stream. + * @throws IOException + * an error occurred reading a local object's data to include in + * the pack, or writing compressed object data to the output + * stream. + */ + public void writePack(ProgressMonitor compressMonitor, + ProgressMonitor writeMonitor, OutputStream packStream) + throws IOException { + if (compressMonitor == null) + compressMonitor = NullProgressMonitor.INSTANCE; + if (writeMonitor == null) + writeMonitor = NullProgressMonitor.INSTANCE; + + excludeInPacks = null; + excludeInPackLast = null; + + boolean needSearchForReuse = reuseSupport != null && ( + reuseDeltas + || config.isReuseObjects() + || !cachedPacks.isEmpty()); + + if (compressMonitor instanceof BatchingProgressMonitor) { + long delay = 1000; + if (needSearchForReuse && config.isDeltaCompress()) + delay = 500; + ((BatchingProgressMonitor) compressMonitor).setDelayStart( + delay, + TimeUnit.MILLISECONDS); + } + + if (needSearchForReuse) + searchForReuse(compressMonitor); + if (config.isDeltaCompress()) + searchForDeltas(compressMonitor); + + crc32 = new CRC32(); + final PackOutputStream out = new PackOutputStream( + writeMonitor, + isIndexDisabled() + ? packStream + : new CheckedOutputStream(packStream, crc32), + this); + + long objCnt = getObjectCount(); + stats.totalObjects = objCnt; + beginPhase(PackingPhase.WRITING, writeMonitor, objCnt); + long writeStart = System.currentTimeMillis(); + + out.writeFileHeader(PACK_VERSION_GENERATED, objCnt); + out.flush(); + + writeObjects(out); + if (!edgeObjects.isEmpty() || !cachedPacks.isEmpty()) { + for (Statistics.ObjectType typeStat : stats.objectTypes) { + if (typeStat == null) + continue; + stats.thinPackBytes += typeStat.bytes; + } + } + + for (CachedPack pack : cachedPacks) { + long deltaCnt = pack.getDeltaCount(); + stats.reusedObjects += pack.getObjectCount(); + stats.reusedDeltas += deltaCnt; + stats.totalDeltas += deltaCnt; + reuseSupport.copyPackAsIs(out, pack, reuseValidate); + } + writeChecksum(out); + out.flush(); + stats.timeWriting = System.currentTimeMillis() - writeStart; + stats.totalBytes = out.length(); + stats.reusedPacks = Collections.unmodifiableList(cachedPacks); + stats.depth = depth; + + for (Statistics.ObjectType typeStat : stats.objectTypes) { + if (typeStat == null) + continue; + typeStat.cntDeltas += typeStat.reusedDeltas; + + stats.reusedObjects += typeStat.reusedObjects; + stats.reusedDeltas += typeStat.reusedDeltas; + stats.totalDeltas += typeStat.cntDeltas; + } + + reader.release(); + endPhase(writeMonitor); + } + + /** + * @return description of what this PackWriter did in order to create the + * final pack stream. The object is only available to callers after + * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} + */ + public Statistics getStatistics() { + return stats; + } + + /** @return snapshot of the current state of this PackWriter. */ + public State getState() { + return state.snapshot(); + } + + /** Release all resources used by this writer. */ + public void release() { + reader.release(); + if (myDeflater != null) { + myDeflater.end(); + myDeflater = null; + } + instances.remove(selfRef); + } + + private void searchForReuse(ProgressMonitor monitor) throws IOException { + long cnt = 0; + cnt += objectsLists[OBJ_COMMIT].size(); + cnt += objectsLists[OBJ_TREE].size(); + cnt += objectsLists[OBJ_BLOB].size(); + cnt += objectsLists[OBJ_TAG].size(); + + long start = System.currentTimeMillis(); + beginPhase(PackingPhase.FINDING_SOURCES, monitor, cnt); + if (cnt <= 4096) { + // For small object counts, do everything as one list. + BlockList tmp = new BlockList((int) cnt); + tmp.addAll(objectsLists[OBJ_TAG]); + tmp.addAll(objectsLists[OBJ_COMMIT]); + tmp.addAll(objectsLists[OBJ_TREE]); + tmp.addAll(objectsLists[OBJ_BLOB]); + searchForReuse(monitor, tmp); + if (pruneCurrentObjectList) { + // If the list was pruned, we need to re-prune the main lists. + pruneEdgesFromObjectList(objectsLists[OBJ_COMMIT]); + pruneEdgesFromObjectList(objectsLists[OBJ_TREE]); + pruneEdgesFromObjectList(objectsLists[OBJ_BLOB]); + pruneEdgesFromObjectList(objectsLists[OBJ_TAG]); + } + } else { + searchForReuse(monitor, objectsLists[OBJ_TAG]); + searchForReuse(monitor, objectsLists[OBJ_COMMIT]); + searchForReuse(monitor, objectsLists[OBJ_TREE]); + searchForReuse(monitor, objectsLists[OBJ_BLOB]); + } + endPhase(monitor); + stats.timeSearchingForReuse = System.currentTimeMillis() - start; + + if (config.isReuseDeltas() && config.getCutDeltaChains()) { + cutDeltaChains(objectsLists[OBJ_TREE]); + cutDeltaChains(objectsLists[OBJ_BLOB]); + } + } + + private void searchForReuse(ProgressMonitor monitor, List list) + throws IOException, MissingObjectException { + pruneCurrentObjectList = false; + reuseSupport.selectObjectRepresentation(this, monitor, list); + if (pruneCurrentObjectList) + pruneEdgesFromObjectList(list); + } + + private void cutDeltaChains(BlockList list) + throws IOException { + int max = config.getMaxDeltaDepth(); + for (int idx = list.size() - 1; idx >= 0; idx--) { + int d = 0; + ObjectToPack b = list.get(idx).getDeltaBase(); + while (b != null) { + if (d < b.getChainLength()) + break; + b.setChainLength(++d); + if (d >= max && b.isDeltaRepresentation()) { + reselectNonDelta(b); + break; + } + b = b.getDeltaBase(); + } + } + if (config.isDeltaCompress()) { + for (ObjectToPack otp : list) + otp.clearChainLength(); + } + } + + private void searchForDeltas(ProgressMonitor monitor) + throws MissingObjectException, IncorrectObjectTypeException, + IOException { + // Commits and annotated tags tend to have too many differences to + // really benefit from delta compression. Consequently just don't + // bother examining those types here. + // + ObjectToPack[] list = new ObjectToPack[ + objectsLists[OBJ_TREE].size() + + objectsLists[OBJ_BLOB].size() + + edgeObjects.size()]; + int cnt = 0; + cnt = findObjectsNeedingDelta(list, cnt, OBJ_TREE); + cnt = findObjectsNeedingDelta(list, cnt, OBJ_BLOB); + if (cnt == 0) + return; + int nonEdgeCnt = cnt; + + // Queue up any edge objects that we might delta against. We won't + // be sending these as we assume the other side has them, but we need + // them in the search phase below. + // + for (ObjectToPack eo : edgeObjects) { + eo.setWeight(0); + list[cnt++] = eo; + } + + // Compute the sizes of the objects so we can do a proper sort. + // We let the reader skip missing objects if it chooses. For + // some readers this can be a huge win. We detect missing objects + // by having set the weights above to 0 and allowing the delta + // search code to discover the missing object and skip over it, or + // abort with an exception if we actually had to have it. + // + final long sizingStart = System.currentTimeMillis(); + beginPhase(PackingPhase.GETTING_SIZES, monitor, cnt); + AsyncObjectSizeQueue sizeQueue = reader.getObjectSize( + Arrays. asList(list).subList(0, cnt), false); + try { + final long limit = Math.min( + config.getBigFileThreshold(), + Integer.MAX_VALUE); + for (;;) { + try { + if (!sizeQueue.next()) + break; + } catch (MissingObjectException notFound) { + monitor.update(1); + if (ignoreMissingUninteresting) { + ObjectToPack otp = sizeQueue.getCurrent(); + if (otp != null && otp.isEdge()) { + otp.setDoNotDelta(); + continue; + } + + otp = objectsMap.get(notFound.getObjectId()); + if (otp != null && otp.isEdge()) { + otp.setDoNotDelta(); + continue; + } + } + throw notFound; + } + + ObjectToPack otp = sizeQueue.getCurrent(); + if (otp == null) + otp = objectsMap.get(sizeQueue.getObjectId()); + + long sz = sizeQueue.getSize(); + if (DeltaIndex.BLKSZ < sz && sz < limit) + otp.setWeight((int) sz); + else + otp.setDoNotDelta(); // too small, or too big + monitor.update(1); + } + } finally { + sizeQueue.release(); + } + endPhase(monitor); + stats.timeSearchingForSizes = System.currentTimeMillis() - sizingStart; + + // Sort the objects by path hash so like files are near each other, + // and then by size descending so that bigger files are first. This + // applies "Linus' Law" which states that newer files tend to be the + // bigger ones, because source files grow and hardly ever shrink. + // + Arrays.sort(list, 0, cnt, new Comparator() { + public int compare(ObjectToPack a, ObjectToPack b) { + int cmp = (a.isDoNotDelta() ? 1 : 0) + - (b.isDoNotDelta() ? 1 : 0); + if (cmp != 0) + return cmp; + + cmp = a.getType() - b.getType(); + if (cmp != 0) + return cmp; + + cmp = (a.getPathHash() >>> 1) - (b.getPathHash() >>> 1); + if (cmp != 0) + return cmp; + + cmp = (a.getPathHash() & 1) - (b.getPathHash() & 1); + if (cmp != 0) + return cmp; + + cmp = (a.isEdge() ? 0 : 1) - (b.isEdge() ? 0 : 1); + if (cmp != 0) + return cmp; + + return b.getWeight() - a.getWeight(); + } + }); + + // Above we stored the objects we cannot delta onto the end. + // Remove them from the list so we don't waste time on them. + while (0 < cnt && list[cnt - 1].isDoNotDelta()) { + if (!list[cnt - 1].isEdge()) + nonEdgeCnt--; + cnt--; + } + if (cnt == 0) + return; + + final long searchStart = System.currentTimeMillis(); + searchForDeltas(monitor, list, cnt); + stats.deltaSearchNonEdgeObjects = nonEdgeCnt; + stats.timeCompressing = System.currentTimeMillis() - searchStart; + + for (int i = 0; i < cnt; i++) + if (!list[i].isEdge() && list[i].isDeltaRepresentation()) + stats.deltasFound++; + } + + private int findObjectsNeedingDelta(ObjectToPack[] list, int cnt, int type) { + for (ObjectToPack otp : objectsLists[type]) { + if (otp.isDoNotDelta()) // delta is disabled for this path + continue; + if (otp.isDeltaRepresentation()) // already reusing a delta + continue; + otp.setWeight(0); + list[cnt++] = otp; + } + return cnt; + } + + private void reselectNonDelta(ObjectToPack otp) throws IOException { + otp.clearDeltaBase(); + otp.clearReuseAsIs(); + boolean old = reuseDeltas; + reuseDeltas = false; + reuseSupport.selectObjectRepresentation(this, + NullProgressMonitor.INSTANCE, + Collections.singleton(otp)); + reuseDeltas = old; + } + + private void searchForDeltas(final ProgressMonitor monitor, + final ObjectToPack[] list, final int cnt) + throws MissingObjectException, IncorrectObjectTypeException, + LargeObjectException, IOException { + int threads = config.getThreads(); + if (threads == 0) + threads = Runtime.getRuntime().availableProcessors(); + if (threads <= 1 || cnt <= config.getDeltaSearchWindowSize()) + singleThreadDeltaSearch(monitor, list, cnt); + else + parallelDeltaSearch(monitor, list, cnt, threads); + } + + private void singleThreadDeltaSearch(ProgressMonitor monitor, + ObjectToPack[] list, int cnt) throws IOException { + long totalWeight = 0; + for (int i = 0; i < cnt; i++) { + ObjectToPack o = list[i]; + if (!o.isEdge() && !o.doNotAttemptDelta()) + totalWeight += o.getWeight(); + } + + long bytesPerUnit = 1; + while (DeltaTask.MAX_METER <= (totalWeight / bytesPerUnit)) + bytesPerUnit <<= 10; + int cost = (int) (totalWeight / bytesPerUnit); + if (totalWeight % bytesPerUnit != 0) + cost++; + + beginPhase(PackingPhase.COMPRESSING, monitor, cost); + new DeltaWindow(config, new DeltaCache(config), reader, + monitor, bytesPerUnit, + list, 0, cnt).search(); + endPhase(monitor); + } + + private void parallelDeltaSearch(ProgressMonitor monitor, + ObjectToPack[] list, int cnt, int threads) throws IOException { + DeltaCache dc = new ThreadSafeDeltaCache(config); + ThreadSafeProgressMonitor pm = new ThreadSafeProgressMonitor(monitor); + DeltaTask.Block taskBlock = new DeltaTask.Block(threads, config, + reader, dc, pm, + list, 0, cnt); + taskBlock.partitionTasks(); + beginPhase(PackingPhase.COMPRESSING, monitor, taskBlock.cost()); + pm.startWorkers(taskBlock.tasks.size()); + + Executor executor = config.getExecutor(); + final List errors = + Collections.synchronizedList(new ArrayList(threads)); + if (executor instanceof ExecutorService) { + // Caller supplied us a service, use it directly. + runTasks((ExecutorService) executor, pm, taskBlock, errors); + } else if (executor == null) { + // Caller didn't give us a way to run the tasks, spawn up a + // temporary thread pool and make sure it tears down cleanly. + ExecutorService pool = Executors.newFixedThreadPool(threads); + try { + runTasks(pool, pm, taskBlock, errors); + } finally { + pool.shutdown(); + for (;;) { + try { + if (pool.awaitTermination(60, TimeUnit.SECONDS)) + break; + } catch (InterruptedException e) { + throw new IOException( + JGitText.get().packingCancelledDuringObjectsWriting); + } + } + } + } else { + // The caller gave us an executor, but it might not do + // asynchronous execution. Wrap everything and hope it + // can schedule these for us. + for (final DeltaTask task : taskBlock.tasks) { + executor.execute(new Runnable() { + public void run() { + try { + task.call(); + } catch (Throwable failure) { + errors.add(failure); + } + } + }); + } + try { + pm.waitForCompletion(); + } catch (InterruptedException ie) { + // We can't abort the other tasks as we have no handle. + // Cross our fingers and just break out anyway. + // + throw new IOException( + JGitText.get().packingCancelledDuringObjectsWriting); + } + } + + // If any task threw an error, try to report it back as + // though we weren't using a threaded search algorithm. + // + if (!errors.isEmpty()) { + Throwable err = errors.get(0); + if (err instanceof Error) + throw (Error) err; + if (err instanceof RuntimeException) + throw (RuntimeException) err; + if (err instanceof IOException) + throw (IOException) err; + + IOException fail = new IOException(err.getMessage()); + fail.initCause(err); + throw fail; + } + endPhase(monitor); + } + + private static void runTasks(ExecutorService pool, + ThreadSafeProgressMonitor pm, + DeltaTask.Block tb, List errors) throws IOException { + List> futures = new ArrayList>(tb.tasks.size()); + for (DeltaTask task : tb.tasks) + futures.add(pool.submit(task)); + + try { + pm.waitForCompletion(); + for (Future f : futures) { + try { + f.get(); + } catch (ExecutionException failed) { + errors.add(failed.getCause()); + } + } + } catch (InterruptedException ie) { + for (Future f : futures) + f.cancel(true); + throw new IOException( + JGitText.get().packingCancelledDuringObjectsWriting); + } + } + + private void writeObjects(PackOutputStream out) throws IOException { + writeObjects(out, objectsLists[OBJ_COMMIT]); + writeObjects(out, objectsLists[OBJ_TAG]); + writeObjects(out, objectsLists[OBJ_TREE]); + writeObjects(out, objectsLists[OBJ_BLOB]); + } + + private void writeObjects(PackOutputStream out, List list) + throws IOException { + if (list.isEmpty()) + return; + + typeStats = stats.objectTypes[list.get(0).getType()]; + long beginOffset = out.length(); + + if (reuseSupport != null) { + reuseSupport.writeObjects(out, list); + } else { + for (ObjectToPack otp : list) + out.writeObject(otp); + } + + typeStats.bytes += out.length() - beginOffset; + typeStats.cntObjects = list.size(); + } + + void writeObject(PackOutputStream out, ObjectToPack otp) throws IOException { + if (!otp.isWritten()) + writeObjectImpl(out, otp); + } + + private void writeObjectImpl(PackOutputStream out, ObjectToPack otp) + throws IOException { + if (otp.wantWrite()) { + // A cycle exists in this delta chain. This should only occur if a + // selected object representation disappeared during writing + // (for example due to a concurrent repack) and a different base + // was chosen, forcing a cycle. Select something other than a + // delta, and write this object. + reselectNonDelta(otp); + } + otp.markWantWrite(); + + while (otp.isReuseAsIs()) { + writeBase(out, otp.getDeltaBase()); + if (otp.isWritten()) + return; // Delta chain cycle caused this to write already. + + crc32.reset(); + otp.setOffset(out.length()); + try { + reuseSupport.copyObjectAsIs(out, otp, reuseValidate); + out.endObject(); + otp.setCRC((int) crc32.getValue()); + typeStats.reusedObjects++; + if (otp.isDeltaRepresentation()) { + typeStats.reusedDeltas++; + typeStats.deltaBytes += out.length() - otp.getOffset(); + } + return; + } catch (StoredObjectRepresentationNotAvailableException gone) { + if (otp.getOffset() == out.length()) { + otp.setOffset(0); + otp.clearDeltaBase(); + otp.clearReuseAsIs(); + reuseSupport.selectObjectRepresentation(this, + NullProgressMonitor.INSTANCE, + Collections.singleton(otp)); + continue; + } else { + // Object writing already started, we cannot recover. + // + CorruptObjectException coe; + coe = new CorruptObjectException(otp, ""); //$NON-NLS-1$ + coe.initCause(gone); + throw coe; + } + } + } + + // If we reached here, reuse wasn't possible. + // + if (otp.isDeltaRepresentation()) + writeDeltaObjectDeflate(out, otp); + else + writeWholeObjectDeflate(out, otp); + out.endObject(); + otp.setCRC((int) crc32.getValue()); + } + + private void writeBase(PackOutputStream out, ObjectToPack base) + throws IOException { + if (base != null && !base.isWritten() && !base.isEdge()) + writeObjectImpl(out, base); + } + + private void writeWholeObjectDeflate(PackOutputStream out, + final ObjectToPack otp) throws IOException { + final Deflater deflater = deflater(); + final ObjectLoader ldr = reader.open(otp, otp.getType()); + + crc32.reset(); + otp.setOffset(out.length()); + out.writeHeader(otp, ldr.getSize()); + + deflater.reset(); + DeflaterOutputStream dst = new DeflaterOutputStream(out, deflater); + ldr.copyTo(dst); + dst.finish(); + } + + private void writeDeltaObjectDeflate(PackOutputStream out, + final ObjectToPack otp) throws IOException { + writeBase(out, otp.getDeltaBase()); + + crc32.reset(); + otp.setOffset(out.length()); + + DeltaCache.Ref ref = otp.popCachedDelta(); + if (ref != null) { + byte[] zbuf = ref.get(); + if (zbuf != null) { + out.writeHeader(otp, otp.getCachedSize()); + out.write(zbuf); + return; + } + } + + TemporaryBuffer.Heap delta = delta(otp); + out.writeHeader(otp, delta.length()); + + Deflater deflater = deflater(); + deflater.reset(); + DeflaterOutputStream dst = new DeflaterOutputStream(out, deflater); + delta.writeTo(dst, null); + dst.finish(); + typeStats.cntDeltas++; + typeStats.deltaBytes += out.length() - otp.getOffset(); + } + + private TemporaryBuffer.Heap delta(final ObjectToPack otp) + throws IOException { + DeltaIndex index = new DeltaIndex(buffer(otp.getDeltaBaseId())); + byte[] res = buffer(otp); + + // We never would have proposed this pair if the delta would be + // larger than the unpacked version of the object. So using it + // as our buffer limit is valid: we will never reach it. + // + TemporaryBuffer.Heap delta = new TemporaryBuffer.Heap(res.length); + index.encode(delta, res); + return delta; + } + + private byte[] buffer(AnyObjectId objId) throws IOException { + return buffer(config, reader, objId); + } + + static byte[] buffer(PackConfig config, ObjectReader or, AnyObjectId objId) + throws IOException { + // PackWriter should have already pruned objects that + // are above the big file threshold, so our chances of + // the object being below it are very good. We really + // shouldn't be here, unless the implementation is odd. + + return or.open(objId).getCachedBytes(config.getBigFileThreshold()); + } + + private Deflater deflater() { + if (myDeflater == null) + myDeflater = new Deflater(config.getCompressionLevel()); + return myDeflater; + } + + private void writeChecksum(PackOutputStream out) throws IOException { + packcsum = out.getDigest(); + out.write(packcsum); + } + + private void findObjectsToPack(final ProgressMonitor countingMonitor, + final ObjectWalk walker, final Set want, + Set have) + throws MissingObjectException, IOException, + IncorrectObjectTypeException { + final long countingStart = System.currentTimeMillis(); + beginPhase(PackingPhase.COUNTING, countingMonitor, ProgressMonitor.UNKNOWN); + + if (have == null) + have = Collections.emptySet(); + + stats.interestingObjects = Collections.unmodifiableSet(new HashSet(want)); + stats.uninterestingObjects = Collections.unmodifiableSet(new HashSet(have)); + + walker.setRetainBody(false); + + canBuildBitmaps = config.isBuildBitmaps() + && !shallowPack + && have.isEmpty() + && (excludeInPacks == null || excludeInPacks.length == 0); + if (!shallowPack && useBitmaps) { + BitmapIndex bitmapIndex = reader.getBitmapIndex(); + if (bitmapIndex != null) { + PackWriterBitmapWalker bitmapWalker = new PackWriterBitmapWalker( + walker, bitmapIndex, countingMonitor); + findObjectsToPackUsingBitmaps(bitmapWalker, want, have); + endPhase(countingMonitor); + stats.timeCounting = System.currentTimeMillis() - countingStart; + return; + } + } + + List all = new ArrayList(want.size() + have.size()); + all.addAll(want); + all.addAll(have); + + final RevFlag include = walker.newFlag("include"); //$NON-NLS-1$ + final RevFlag added = walker.newFlag("added"); //$NON-NLS-1$ + + walker.carry(include); + + int haveEst = have.size(); + if (have.isEmpty()) { + walker.sort(RevSort.COMMIT_TIME_DESC); + } else { + walker.sort(RevSort.TOPO); + if (thin) + walker.sort(RevSort.BOUNDARY, true); + } + + List wantObjs = new ArrayList(want.size()); + List haveObjs = new ArrayList(haveEst); + List wantTags = new ArrayList(want.size()); + + AsyncRevObjectQueue q = walker.parseAny(all, true); + try { + for (;;) { + try { + RevObject o = q.next(); + if (o == null) + break; + if (have.contains(o)) + haveObjs.add(o); + if (want.contains(o)) { + o.add(include); + wantObjs.add(o); + if (o instanceof RevTag) + wantTags.add((RevTag) o); + } + } catch (MissingObjectException e) { + if (ignoreMissingUninteresting + && have.contains(e.getObjectId())) + continue; + throw e; + } + } + } finally { + q.release(); + } + + if (!wantTags.isEmpty()) { + all = new ArrayList(wantTags.size()); + for (RevTag tag : wantTags) + all.add(tag.getObject()); + q = walker.parseAny(all, true); + try { + while (q.next() != null) { + // Just need to pop the queue item to parse the object. + } + } finally { + q.release(); + } + } + + if (walker instanceof DepthWalk.ObjectWalk) { + DepthWalk.ObjectWalk depthWalk = (DepthWalk.ObjectWalk) walker; + for (RevObject obj : wantObjs) + depthWalk.markRoot(obj); + if (unshallowObjects != null) { + for (ObjectId id : unshallowObjects) + depthWalk.markUnshallow(walker.parseAny(id)); + } + } else { + for (RevObject obj : wantObjs) + walker.markStart(obj); + } + for (RevObject obj : haveObjs) + walker.markUninteresting(obj); + + final int maxBases = config.getDeltaSearchWindowSize(); + Set baseTrees = new HashSet(); + BlockList commits = new BlockList(); + RevCommit c; + while ((c = walker.next()) != null) { + if (exclude(c)) + continue; + if (c.has(RevFlag.UNINTERESTING)) { + if (baseTrees.size() <= maxBases) + baseTrees.add(c.getTree()); + continue; + } + + commits.add(c); + countingMonitor.update(1); + } + + if (shallowPack) { + for (RevCommit cmit : commits) { + addObject(cmit, 0); + } + } else { + int commitCnt = 0; + boolean putTagTargets = false; + for (RevCommit cmit : commits) { + if (!cmit.has(added)) { + cmit.add(added); + addObject(cmit, 0); + commitCnt++; + } + + for (int i = 0; i < cmit.getParentCount(); i++) { + RevCommit p = cmit.getParent(i); + if (!p.has(added) && !p.has(RevFlag.UNINTERESTING) + && !exclude(p)) { + p.add(added); + addObject(p, 0); + commitCnt++; + } + } + + if (!putTagTargets && 4096 < commitCnt) { + for (ObjectId id : tagTargets) { + RevObject obj = walker.lookupOrNull(id); + if (obj instanceof RevCommit + && obj.has(include) + && !obj.has(RevFlag.UNINTERESTING) + && !obj.has(added)) { + obj.add(added); + addObject(obj, 0); + } + } + putTagTargets = true; + } + } + } + commits = null; + + if (thin && !baseTrees.isEmpty()) { + BaseSearch bases = new BaseSearch(countingMonitor, baseTrees, // + objectsMap, edgeObjects, reader); + RevObject o; + while ((o = walker.nextObject()) != null) { + if (o.has(RevFlag.UNINTERESTING)) + continue; + if (exclude(o)) + continue; + + int pathHash = walker.getPathHashCode(); + byte[] pathBuf = walker.getPathBuffer(); + int pathLen = walker.getPathLength(); + bases.addBase(o.getType(), pathBuf, pathLen, pathHash); + addObject(o, pathHash); + countingMonitor.update(1); + } + } else { + RevObject o; + while ((o = walker.nextObject()) != null) { + if (o.has(RevFlag.UNINTERESTING)) + continue; + if (exclude(o)) + continue; + addObject(o, walker.getPathHashCode()); + countingMonitor.update(1); + } + } + + for (CachedPack pack : cachedPacks) + countingMonitor.update((int) pack.getObjectCount()); + endPhase(countingMonitor); + stats.timeCounting = System.currentTimeMillis() - countingStart; + } + + private void findObjectsToPackUsingBitmaps( + PackWriterBitmapWalker bitmapWalker, Set want, + Set have) + throws MissingObjectException, IncorrectObjectTypeException, + IOException { + BitmapBuilder haveBitmap = bitmapWalker.findObjects(have, null); + bitmapWalker.reset(); + BitmapBuilder wantBitmap = bitmapWalker.findObjects(want, haveBitmap); + BitmapBuilder needBitmap = wantBitmap.andNot(haveBitmap); + + if (useCachedPacks && reuseSupport != null + && (excludeInPacks == null || excludeInPacks.length == 0)) + cachedPacks.addAll( + reuseSupport.getCachedPacksAndUpdate(needBitmap)); + + for (BitmapObject obj : needBitmap) { + ObjectId objectId = obj.getObjectId(); + if (exclude(objectId)) { + needBitmap.remove(objectId); + continue; + } + addObject(objectId, obj.getType(), 0); + } + + if (thin) + haveObjects = haveBitmap; + } + + private static void pruneEdgesFromObjectList(List list) { + final int size = list.size(); + int src = 0; + int dst = 0; + + for (; src < size; src++) { + ObjectToPack obj = list.get(src); + if (obj.isEdge()) + continue; + if (dst != src) + list.set(dst, obj); + dst++; + } + + while (dst < list.size()) + list.remove(list.size() - 1); + } + + /** + * Include one object to the output file. + *

+ * Objects are written in the order they are added. If the same object is + * added twice, it may be written twice, creating a larger than necessary + * file. + * + * @param object + * the object to add. + * @throws IncorrectObjectTypeException + * the object is an unsupported type. + */ + public void addObject(final RevObject object) + throws IncorrectObjectTypeException { + if (!exclude(object)) + addObject(object, 0); + } + + private void addObject(final RevObject object, final int pathHashCode) { + addObject(object, object.getType(), pathHashCode); + } + + private void addObject( + final AnyObjectId src, final int type, final int pathHashCode) { + final ObjectToPack otp; + if (reuseSupport != null) + otp = reuseSupport.newObjectToPack(src, type); + else + otp = new ObjectToPack(src, type); + otp.setPathHash(pathHashCode); + objectsLists[type].add(otp); + objectsMap.add(otp); + } + + private boolean exclude(AnyObjectId objectId) { + if (excludeInPacks == null) + return false; + if (excludeInPackLast.contains(objectId)) + return true; + for (ObjectIdSet idx : excludeInPacks) { + if (idx.contains(objectId)) { + excludeInPackLast = idx; + return true; + } + } + return false; + } + + /** + * Select an object representation for this writer. + *

+ * An {@link ObjectReader} implementation should invoke this method once for + * each representation available for an object, to allow the writer to find + * the most suitable one for the output. + * + * @param otp + * the object being packed. + * @param next + * the next available representation from the repository. + */ + public void select(ObjectToPack otp, StoredObjectRepresentation next) { + int nFmt = next.getFormat(); + + if (!cachedPacks.isEmpty()) { + if (otp.isEdge()) + return; + if ((nFmt == PACK_WHOLE) | (nFmt == PACK_DELTA)) { + for (CachedPack pack : cachedPacks) { + if (pack.hasObject(otp, next)) { + otp.setEdge(); + otp.clearDeltaBase(); + otp.clearReuseAsIs(); + pruneCurrentObjectList = true; + return; + } + } + } + } + + if (nFmt == PACK_DELTA && reuseDeltas && reuseDeltaFor(otp)) { + ObjectId baseId = next.getDeltaBase(); + ObjectToPack ptr = objectsMap.get(baseId); + if (ptr != null && !ptr.isEdge()) { + otp.setDeltaBase(ptr); + otp.setReuseAsIs(); + } else if (thin && have(ptr, baseId)) { + otp.setDeltaBase(baseId); + otp.setReuseAsIs(); + } else { + otp.clearDeltaBase(); + otp.clearReuseAsIs(); + } + } else if (nFmt == PACK_WHOLE && config.isReuseObjects()) { + int nWeight = next.getWeight(); + if (otp.isReuseAsIs() && !otp.isDeltaRepresentation()) { + // We've chosen another PACK_WHOLE format for this object, + // choose the one that has the smaller compressed size. + // + if (otp.getWeight() <= nWeight) + return; + } + otp.clearDeltaBase(); + otp.setReuseAsIs(); + otp.setWeight(nWeight); + } else { + otp.clearDeltaBase(); + otp.clearReuseAsIs(); + } + + otp.setDeltaAttempted(reuseDeltas & next.wasDeltaAttempted()); + otp.select(next); + } + + private final boolean have(ObjectToPack ptr, AnyObjectId objectId) { + return (ptr != null && ptr.isEdge()) + || (haveObjects != null && haveObjects.contains(objectId)); + } + + /** + * Prepares the bitmaps to be written to the pack index. Bitmaps can be used + * to speed up fetches and clones by storing the entire object graph at + * selected commits. + * + * This method can only be invoked after + * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has + * been invoked and completed successfully. Writing a corresponding bitmap + * index is an optional feature that not all pack users may require. + * + * @param pm + * progress monitor to report bitmap building work. + * @return whether a bitmap index may be written. + * @throws IOException + * when some I/O problem occur during reading objects. + */ + public boolean prepareBitmapIndex(ProgressMonitor pm) throws IOException { + if (!canBuildBitmaps || getObjectCount() > Integer.MAX_VALUE + || !cachedPacks.isEmpty()) + return false; + + if (pm == null) + pm = NullProgressMonitor.INSTANCE; + + writeBitmaps = new PackBitmapIndexBuilder(sortByName()); + PackWriterBitmapPreparer bitmapPreparer = new PackWriterBitmapPreparer( + reader, writeBitmaps, pm, stats.interestingObjects); + + int numCommits = objectsLists[OBJ_COMMIT].size(); + Collection selectedCommits = + bitmapPreparer.doCommitSelection(numCommits); + + beginPhase(PackingPhase.BUILDING_BITMAPS, pm, selectedCommits.size()); + + PackWriterBitmapWalker walker = bitmapPreparer.newBitmapWalker(); + AnyObjectId last = null; + for (PackWriterBitmapPreparer.BitmapCommit cmit : selectedCommits) { + if (cmit.isReuseWalker()) + walker.reset(); + else + walker = bitmapPreparer.newBitmapWalker(); + + BitmapBuilder bitmap = walker.findObjects( + Collections.singleton(cmit), null); + + if (last != null && cmit.isReuseWalker() && !bitmap.contains(last)) + throw new IllegalStateException(MessageFormat.format( + JGitText.get().bitmapMissingObject, cmit.name(), + last.name())); + last = cmit; + writeBitmaps.addBitmap(cmit, bitmap.build(), cmit.getFlags()); + + pm.update(1); + } + + endPhase(pm); + return true; + } + + private boolean reuseDeltaFor(ObjectToPack otp) { + int type = otp.getType(); + if ((type & 2) != 0) // OBJ_TREE(2) or OBJ_BLOB(3) + return true; + if (type == OBJ_COMMIT) + return reuseDeltaCommits; + if (type == OBJ_TAG) + return false; + return true; + } + + /** Summary of how PackWriter created the pack. */ + public static class Statistics { + /** Statistics about a single class of object. */ + public static class ObjectType { + long cntObjects; + + long cntDeltas; + + long reusedObjects; + + long reusedDeltas; + + long bytes; + + long deltaBytes; + + /** + * @return total number of objects output. This total includes the + * value of {@link #getDeltas()}. + */ + public long getObjects() { + return cntObjects; + } + + /** + * @return total number of deltas output. This may be lower than the + * actual number of deltas if a cached pack was reused. + */ + public long getDeltas() { + return cntDeltas; + } + + /** + * @return number of objects whose existing representation was + * reused in the output. This count includes + * {@link #getReusedDeltas()}. + */ + public long getReusedObjects() { + return reusedObjects; + } + + /** + * @return number of deltas whose existing representation was reused + * in the output, as their base object was also output or + * was assumed present for a thin pack. This may be lower + * than the actual number of reused deltas if a cached pack + * was reused. + */ + public long getReusedDeltas() { + return reusedDeltas; + } + + /** + * @return total number of bytes written. This size includes the + * object headers as well as the compressed data. This size + * also includes all of {@link #getDeltaBytes()}. + */ + public long getBytes() { + return bytes; + } + + /** + * @return number of delta bytes written. This size includes the + * object headers for the delta objects. + */ + public long getDeltaBytes() { + return deltaBytes; + } + } + + Set interestingObjects; + + Set uninterestingObjects; + + Collection reusedPacks; + + int depth; + + int deltaSearchNonEdgeObjects; + + int deltasFound; + + long totalObjects; + + long totalDeltas; + + long reusedObjects; + + long reusedDeltas; + + long totalBytes; + + long thinPackBytes; + + long timeCounting; + + long timeSearchingForReuse; + + long timeSearchingForSizes; + + long timeCompressing; + + long timeWriting; + + ObjectType[] objectTypes; + + { + objectTypes = new ObjectType[5]; + objectTypes[OBJ_COMMIT] = new ObjectType(); + objectTypes[OBJ_TREE] = new ObjectType(); + objectTypes[OBJ_BLOB] = new ObjectType(); + objectTypes[OBJ_TAG] = new ObjectType(); + } + + /** + * @return unmodifiable collection of objects to be included in the + * pack. May be null if the pack was hand-crafted in a unit + * test. + */ + public Set getInterestingObjects() { + return interestingObjects; + } + + /** + * @return unmodifiable collection of objects that should be excluded + * from the pack, as the peer that will receive the pack already + * has these objects. + */ + public Set getUninterestingObjects() { + return uninterestingObjects; + } + + /** + * @return unmodifiable collection of the cached packs that were reused + * in the output, if any were selected for reuse. + */ + public Collection getReusedPacks() { + return reusedPacks; + } + + /** + * @return number of objects in the output pack that went through the + * delta search process in order to find a potential delta base. + */ + public int getDeltaSearchNonEdgeObjects() { + return deltaSearchNonEdgeObjects; + } + + /** + * @return number of objects in the output pack that went through delta + * base search and found a suitable base. This is a subset of + * {@link #getDeltaSearchNonEdgeObjects()}. + */ + public int getDeltasFound() { + return deltasFound; + } + + /** + * @return total number of objects output. This total includes the value + * of {@link #getTotalDeltas()}. + */ + public long getTotalObjects() { + return totalObjects; + } + + /** + * @return total number of deltas output. This may be lower than the + * actual number of deltas if a cached pack was reused. + */ + public long getTotalDeltas() { + return totalDeltas; + } + + /** + * @return number of objects whose existing representation was reused in + * the output. This count includes {@link #getReusedDeltas()}. + */ + public long getReusedObjects() { + return reusedObjects; + } + + /** + * @return number of deltas whose existing representation was reused in + * the output, as their base object was also output or was + * assumed present for a thin pack. This may be lower than the + * actual number of reused deltas if a cached pack was reused. + */ + public long getReusedDeltas() { + return reusedDeltas; + } + + /** + * @return total number of bytes written. This size includes the pack + * header, trailer, thin pack, and reused cached pack(s). + */ + public long getTotalBytes() { + return totalBytes; + } + + /** + * @return size of the thin pack in bytes, if a thin pack was generated. + * A thin pack is created when the client already has objects + * and some deltas are created against those objects, or if a + * cached pack is being used and some deltas will reference + * objects in the cached pack. This size does not include the + * pack header or trailer. + */ + public long getThinPackBytes() { + return thinPackBytes; + } + + /** + * @param typeCode + * object type code, e.g. OBJ_COMMIT or OBJ_TREE. + * @return information about this type of object in the pack. + */ + public ObjectType byObjectType(int typeCode) { + return objectTypes[typeCode]; + } + + /** @return true if the resulting pack file was a shallow pack. */ + public boolean isShallow() { + return depth > 0; + } + + /** @return depth (in commits) the pack includes if shallow. */ + public int getDepth() { + return depth; + } + + /** + * @return time in milliseconds spent enumerating the objects that need + * to be included in the output. This time includes any restarts + * that occur when a cached pack is selected for reuse. + */ + public long getTimeCounting() { + return timeCounting; + } + + /** + * @return time in milliseconds spent matching existing representations + * against objects that will be transmitted, or that the client + * can be assumed to already have. + */ + public long getTimeSearchingForReuse() { + return timeSearchingForReuse; + } + + /** + * @return time in milliseconds spent finding the sizes of all objects + * that will enter the delta compression search window. The + * sizes need to be known to better match similar objects + * together and improve delta compression ratios. + */ + public long getTimeSearchingForSizes() { + return timeSearchingForSizes; + } + + /** + * @return time in milliseconds spent on delta compression. This is + * observed wall-clock time and does not accurately track CPU + * time used when multiple threads were used to perform the + * delta compression. + */ + public long getTimeCompressing() { + return timeCompressing; + } + + /** + * @return time in milliseconds spent writing the pack output, from + * start of header until end of trailer. The transfer speed can + * be approximated by dividing {@link #getTotalBytes()} by this + * value. + */ + public long getTimeWriting() { + return timeWriting; + } + + /** @return total time spent processing this pack. */ + public long getTimeTotal() { + return timeCounting + + timeSearchingForReuse + + timeSearchingForSizes + + timeCompressing + + timeWriting; + } + + /** + * @return get the average output speed in terms of bytes-per-second. + * {@code getTotalBytes() / (getTimeWriting() / 1000.0)}. + */ + public double getTransferRate() { + return getTotalBytes() / (getTimeWriting() / 1000.0); + } + + /** @return formatted message string for display to clients. */ + public String getMessage() { + return MessageFormat.format(JGitText.get().packWriterStatistics, // + Long.valueOf(totalObjects), Long.valueOf(totalDeltas), // + Long.valueOf(reusedObjects), Long.valueOf(reusedDeltas)); + } + } + + private class MutableState { + /** Estimated size of a single ObjectToPack instance. */ + // Assume 64-bit pointers, since this is just an estimate. + private static final long OBJECT_TO_PACK_SIZE = + (2 * 8) // Object header + + (2 * 8) + (2 * 8) // ObjectToPack fields + + (8 + 8) // PackedObjectInfo fields + + 8 // ObjectIdOwnerMap fields + + 40 // AnyObjectId fields + + 8; // Reference in BlockList + + private final long totalDeltaSearchBytes; + + private volatile PackingPhase phase; + + MutableState() { + phase = PackingPhase.COUNTING; + if (config.isDeltaCompress()) { + int threads = config.getThreads(); + if (threads <= 0) + threads = Runtime.getRuntime().availableProcessors(); + totalDeltaSearchBytes = (threads * config.getDeltaSearchMemoryLimit()) + + config.getBigFileThreshold(); + } else + totalDeltaSearchBytes = 0; + } + + State snapshot() { + long objCnt = 0; + objCnt += objectsLists[OBJ_COMMIT].size(); + objCnt += objectsLists[OBJ_TREE].size(); + objCnt += objectsLists[OBJ_BLOB].size(); + objCnt += objectsLists[OBJ_TAG].size(); + // Exclude CachedPacks. + + long bytesUsed = OBJECT_TO_PACK_SIZE * objCnt; + PackingPhase curr = phase; + if (curr == PackingPhase.COMPRESSING) + bytesUsed += totalDeltaSearchBytes; + return new State(curr, bytesUsed); + } + } + + /** Possible states that a PackWriter can be in. */ + public static enum PackingPhase { + /** Counting objects phase. */ + COUNTING, + + /** Getting sizes phase. */ + GETTING_SIZES, + + /** Finding sources phase. */ + FINDING_SOURCES, + + /** Compressing objects phase. */ + COMPRESSING, + + /** Writing objects phase. */ + WRITING, + + /** Building bitmaps phase. */ + BUILDING_BITMAPS; + } + + /** Summary of the current state of a PackWriter. */ + public class State { + private final PackingPhase phase; + + private final long bytesUsed; + + State(PackingPhase phase, long bytesUsed) { + this.phase = phase; + this.bytesUsed = bytesUsed; + } + + /** @return the PackConfig used to build the writer. */ + public PackConfig getConfig() { + return config; + } + + /** @return the current phase of the writer. */ + public PackingPhase getPhase() { + return phase; + } + + /** @return an estimate of the total memory used by the writer. */ + public long estimateBytesUsed() { + return bytesUsed; + } + + @SuppressWarnings("nls") + @Override + public String toString() { + return "PackWriter.State[" + phase + ", memory=" + bytesUsed + "]"; + } + } +}