From: Geneviève Bastien Date: Wed, 15 Feb 2017 17:23:20 +0000 (-0500) Subject: datastore: Add API for single queries X-Git-Url: http://drtracing.org/?a=commitdiff_plain;h=54d250a3b5e7f5342b4fda0ab078767bfdedcc69;p=deliverable%2Ftracecompass.git datastore: Add API for single queries When what we want is only one interval, there is no need to get them all, we can just fast return when the one interval is found. This change alones reduces the performance impact of the datastore by ~2x for single queries in the case of smallish traces. This patch also adds a binary search to find the first interval to look at. Change-Id: I77d0ac9818f272f7d160f92666445ce2d8b3200a Signed-off-by: Geneviève Bastien Reviewed-on: https://git.eclipse.org/r/91753 Reviewed-by: Hudson CI Reviewed-by: Loic Prieur-Drevon --- diff --git a/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/AbstractHistoryTree.java b/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/AbstractHistoryTree.java index 19e1228798..598aa0ed7c 100644 --- a/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/AbstractHistoryTree.java +++ b/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/AbstractHistoryTree.java @@ -27,6 +27,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.function.Predicate; import org.eclipse.jdt.annotation.NonNull; +import org.eclipse.jdt.annotation.Nullable; import org.eclipse.tracecompass.common.core.NonNullUtils; import org.eclipse.tracecompass.internal.datastore.core.historytree.HtIo; import org.eclipse.tracecompass.internal.provisional.datastore.core.condition.RangeCondition; @@ -908,6 +909,36 @@ public abstract class AbstractHistoryTree timeCondition, + Predicate extraPredicate) { + + /* Queue a stack of nodes containing nodes intersecting t */ + Deque queue = new LinkedList<>(); + /* We start by reading the information in the root node */ + queue.add(getRootNode().getSequenceNumber()); + + /* Then we follow the down in the relevant children until we find the interval */ + try { + while (!queue.isEmpty()) { + int sequenceNumber = queue.pop(); + HTNode currentNode = readNode(sequenceNumber); + + @Nullable E interval = currentNode.getMatchingInterval(timeCondition, extraPredicate); + if (interval != null) { + return interval; + } + + if (currentNode.getNodeType() == HTNode.NodeType.CORE) { + /* Here we add the relevant children nodes for BFS */ + queue.addAll(currentNode.selectNextChildren(timeCondition)); + } + } + } catch (ClosedChannelException e) { + } + return null; + } + @Override public String toString() { return "Information on the current tree:\n\n" + "Blocksize: " //$NON-NLS-1$ //$NON-NLS-2$ diff --git a/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/HTNode.java b/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/HTNode.java index d26a33c139..2d6054f9d2 100644 --- a/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/HTNode.java +++ b/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/HTNode.java @@ -37,7 +37,6 @@ import org.eclipse.tracecompass.internal.provisional.datastore.core.serializatio import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterables; /** * The base class for all the types of nodes that go in the History Tree. @@ -579,6 +578,9 @@ public class HTNode implements IHTNode { * Sub-classes may override this to change or specify the interval * comparator. * + * NOTE: sub-classes who override this may also need to override the + * {@link #getStartIndexFor(RangeCondition, Predicate)}. + * * @return The way intervals are to be sorted in this node */ protected Comparator getIntervalComparator() { @@ -681,32 +683,114 @@ public class HTNode implements IHTNode { public Iterable getMatchingIntervals(RangeCondition timeCondition, Predicate extraPredicate) { - // TODO Use getIntervalComparator() to restrict the dataset further // TODO Benchmark using/returning streams instead of iterables if (isOnDisk()) { - @NonNull Iterable ret = fIntervals; - ret = Iterables.filter(ret, interval -> timeCondition.intersects(interval.getStart(), interval.getEnd())); - ret = Iterables.filter(ret, interval -> extraPredicate.test(interval)); - return ret; + return doGetMatchingIntervals(timeCondition, extraPredicate); } takeReadLock(); try { - return fIntervals.stream() - .filter(interval -> timeCondition.intersects(interval.getStart(), interval.getEnd())) - .filter(extraPredicate) - /* - * Because this class works with read locks, we can't - * return a lazy stream unfortunately. Room for improvement? - */ - .collect(Collectors.toList()); + return doGetMatchingIntervals(timeCondition, extraPredicate); } finally { releaseReadLock(); } } + @Override + public @Nullable E getMatchingInterval(RangeCondition timeCondition, Predicate extraPredicate) { + if (isOnDisk()) { + return doGetMatchingInterval(timeCondition, extraPredicate); + } + + takeReadLock(); + try { + return doGetMatchingInterval(timeCondition, extraPredicate); + + } finally { + releaseReadLock(); + } + } + + private Iterable doGetMatchingIntervals(RangeCondition timeCondition, + Predicate extraPredicate) { + List list = new ArrayList<>(); + for (int i = getStartIndexFor(timeCondition, extraPredicate); i < fIntervals.size(); i++) { + E curInterval = fIntervals.get(i); + if (timeCondition.intersects(curInterval.getStart(), curInterval.getEnd()) && + extraPredicate.test(curInterval)) { + list.add(curInterval); + } + } + return list; + } + + private @Nullable E doGetMatchingInterval(RangeCondition timeCondition, + Predicate extraPredicate) { + for (int i = getStartIndexFor(timeCondition, extraPredicate); i < fIntervals.size(); i++) { + E curInterval = fIntervals.get(i); + if (timeCondition.intersects(curInterval.getStart(), curInterval.getEnd()) && + extraPredicate.test(curInterval)) { + return curInterval; + } + } + return null; + } + + /** + * Get the start index. It will skip all the intervals that end before the + * beginning of the time range. + * + * NOTE: This method goes with the comparator. If a tree overrides the + * default comparator (that sorts first by end time), then it will need to + * override this method. Here, the binary search is re-implemented because + * the interval type is generic but a tree for a concrete type could use a + * binary search instead. + * + * @param timeCondition + * The time-based RangeCondition + * @param extraPredicate + * Extra predicate to run on the elements. + * @return The index of the first interval greater than or equal to the + * conditions in parameter + */ + protected int getStartIndexFor(RangeCondition timeCondition, Predicate extraPredicate) { + if (fIntervals.isEmpty()) { + return 0; + } + /* + * Implement our own binary search since the intervals are generic, we + * cannot make a dummy interval and find its position + */ + int low = 0; + int high = fIntervals.size() - 1; + long target = timeCondition.min(); + + while (low <= high) { + // Divide the sum by 2 + int mid = (low + high) >>> 1; + E midVal = fIntervals.get(mid); + long end = midVal.getEnd(); + + if (end < target) { + low = mid + 1; + } else if (end > target) { + high = mid - 1; + } else { + // key found, rewind to see where it starts + while (end == target && mid > 0) { + mid = mid - 1; + midVal = fIntervals.get(mid); + end = midVal.getEnd(); + } + // if end == target, then mid is 0 + return (end == target ? mid : mid + 1); // key found + } + } + return low; // key not found + } + @Override public void closeThisNode(long endtime) { fRwl.writeLock().lock(); @@ -985,4 +1069,5 @@ public class HTNode implements IHTNode { public void debugPrintIntervals(PrintStream writer) { getIntervals().forEach(writer::println); } + } diff --git a/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/IHTNode.java b/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/IHTNode.java index 2106382eba..c15a7f120d 100644 --- a/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/IHTNode.java +++ b/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/IHTNode.java @@ -15,6 +15,7 @@ import java.util.Collection; import java.util.Collections; import java.util.function.Predicate; +import org.eclipse.jdt.annotation.Nullable; import org.eclipse.tracecompass.internal.provisional.datastore.core.condition.RangeCondition; import org.eclipse.tracecompass.internal.provisional.datastore.core.exceptions.RangeException; import org.eclipse.tracecompass.internal.provisional.datastore.core.interval.IHTInterval; @@ -168,6 +169,20 @@ public interface IHTNode { Iterable getMatchingIntervals(RangeCondition timeCondition, Predicate extraPredicate); + /** + * Retrieve an interval inside this node that matches the given conditions. + * + * @param timeCondition + * The time-based RangeCondition + * @param extraPredicate + * Extra predicate to run on the elements. Only intervals also + * matching this predicate will be returned. + * @return An interval matching the conditions or null if no + * interval was found + */ + @Nullable E getMatchingInterval(RangeCondition timeCondition, + Predicate extraPredicate); + /** * Return the total header size of this node (will depend on the node type). * diff --git a/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/IHistoryTree.java b/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/IHistoryTree.java index 76614371d3..3e2d8e6d69 100644 --- a/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/IHistoryTree.java +++ b/statesystem/org.eclipse.tracecompass.datastore.core/src/org/eclipse/tracecompass/internal/provisional/datastore/core/historytree/IHistoryTree.java @@ -15,6 +15,7 @@ import java.io.IOException; import java.nio.channels.ClosedChannelException; import java.util.function.Predicate; +import org.eclipse.jdt.annotation.Nullable; import org.eclipse.tracecompass.internal.provisional.datastore.core.condition.RangeCondition; import org.eclipse.tracecompass.internal.provisional.datastore.core.exceptions.RangeException; import org.eclipse.tracecompass.internal.provisional.datastore.core.interval.IHTInterval; @@ -150,6 +151,24 @@ public interface IHistoryTree { Iterable getMatchingIntervals(RangeCondition timeCondition, Predicate extraPredicate); + /** + * Query the tree to retrieve an interval matching the given conditions. + * + * @param timeCondition + * Time-based RangeCondition, can represent a single timestamp, a + * series of punctual timestamps, or a time range. + * @param extraPredicate + * Extra check to run on the elements to determine if they should + * be returned or not. This will be checked at the node level, so + * if it's known in advance it might be advantageous to pass it + * here rather than checking it yourself on the returned + * Iterable. + * @return An interval matching the given conditions, or null + * if no interval was found. + */ + @Nullable E getMatchingInterval(RangeCondition timeCondition, + Predicate extraPredicate); + // ------------------------------------------------------------------------ // Attribute-tree reading/writing operations //