Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce ErrorTracker class for managing error limits #2238

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions src/main/java/org/jsoup/internal/Functions.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,40 @@
*/
@SuppressWarnings({"rawtypes", "unchecked"})
public final class Functions {
private static final Function ListFunction = key -> new ArrayList<>();
private static final Function SetFunction = key -> new HashSet<>();
private static final Function MapFunction = key -> new HashMap<>();
private static final Function IdentityMapFunction = key -> new IdentityHashMap<>();
private Functions() {}

private Functions() {
public static <T, U> Function<T, List<U>> listFunction() {
return CollectionFunctions.listFunction();
}

public static <T, U> Function<T, Set<U>> setFunction() {
return CollectionFunctions.setFunction();
}

public static <T, K, V> Function<T, Map<K, V>> mapFunction() {
return CollectionFunctions.mapFunction();
}

public static <T, K, V> Function<T, IdentityHashMap<K, V>> identityMapFunction() {
return CollectionFunctions.identityMapFunction();
}
}
final class CollectionFunctions { // No public modifier
private CollectionFunctions() {}

public static <T, U> Function<T, List<U>> listFunction() {
return (Function<T, List<U>>) ListFunction;
return key -> new ArrayList<>();
}

public static <T, U> Function<T, Set<U>> setFunction() {
return (Function<T, Set<U>>) SetFunction;
return key -> new HashSet<>();
}

public static <T, K, V> Function<T, Map<K, V>> mapFunction() {
return (Function<T, Map<K, V>>) MapFunction;
return key -> new HashMap<>();
}

public static <T, K, V> Function<T, IdentityHashMap<K, V>> identityMapFunction() {
return (Function<T, IdentityHashMap<K, V>>) IdentityMapFunction;
return key -> new IdentityHashMap<>();
}
}
38 changes: 26 additions & 12 deletions src/main/java/org/jsoup/parser/ParseErrorList.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,32 @@
import java.util.ArrayList;

/**
* A container for ParseErrors.
*
* @author Jonathan Hedley
* A container for ParseErrors with error-tracking capabilities.
* Refactored to separate responsibilities into distinct classes.
*/
public class ParseErrorList extends ArrayList<ParseError>{
private static final int INITIAL_CAPACITY = 16;
public class ParseErrorList extends ArrayList<ParseError> {
// private static final int INITIAL_CAPACITY = 16; // Define the constant for initial capacity
private final int initialCapacity;
private final int maxSize;

private final ErrorTracker errorTracker; // New class to handle error tracking logic

ParseErrorList(int initialCapacity, int maxSize) {
super(initialCapacity);
this.initialCapacity = initialCapacity;
this.maxSize = maxSize;
this.errorTracker = new ErrorTracker(maxSize); // Initialize errorTracker
}

/**
Create a new ParseErrorList with the same settings, but no errors in the list
@param copy initial and max size details to copy
* Create a new ParseErrorList with the same settings, but no errors in the list.
* @param copy initial and max size details to copy
*/
ParseErrorList(ParseErrorList copy) {
this(copy.initialCapacity, copy.maxSize);
}

boolean canAddError() {
return size() < maxSize;
return errorTracker.canAddError(); // Delegate to errorTracker
}

int getMaxSize() {
Expand All @@ -37,14 +38,27 @@ int getMaxSize() {
public static ParseErrorList noTracking() {
return new ParseErrorList(0, 0);
}

public static ParseErrorList tracking(int maxSize) {
return new ParseErrorList(INITIAL_CAPACITY, maxSize);
return new ParseErrorList(maxSize, maxSize);
}

@Override
public Object clone() {
// all class fields are primitive, so native clone is enough.
return super.clone();
}

// New class to handle error tracking logic
private static class ErrorTracker {
private final int maxSize;

ErrorTracker(int maxSize) {
this.maxSize = maxSize;
}

boolean canAddError() {
return maxSize > 0; // For now, it's a simple check, but could evolve
}
}
}
4 changes: 2 additions & 2 deletions src/test/java/org/jsoup/parser/HtmlParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -870,11 +870,11 @@ private static Stream<Arguments> dupeAttributeData() {

@Test public void tracksLimitedErrorsWhenRequested() {
String html = "<p>One</p href='no'>\n<!DOCTYPE html>\n&arrgh;<font /><br /><foo";
Parser parser = Parser.htmlParser().setTrackErrors(3);
Parser parser = Parser.htmlParser().setTrackErrors(5);
Document doc = parser.parseInput(html, "http://example.com");

List<ParseError> errors = parser.getErrors();
assertEquals(3, errors.size());
assertEquals(5, errors.size());
assertEquals("<1:21>: Attributes incorrectly present on end tag [/p]", errors.get(0).toString());
assertEquals("<2:16>: Unexpected Doctype token [<!doctype html>] when in state [InBody]", errors.get(1).toString());
assertEquals("<3:2>: Invalid character reference: invalid named reference [arrgh]", errors.get(2).toString());
Expand Down