diff --git a/pom.xml b/pom.xml index 9f8e118..4dbc741 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ 4.0.0 edu.byu.cs240 checkstyle - 1.0.8 + 1.0.9 C S 240 Checkstyle Checks 21 diff --git a/src/main/java/edu/byu/cs240/checkstyle/readability/CommentedCode.java b/src/main/java/edu/byu/cs240/checkstyle/readability/CommentedCode.java index 0d9e24b..d5ab9f7 100644 --- a/src/main/java/edu/byu/cs240/checkstyle/readability/CommentedCode.java +++ b/src/main/java/edu/byu/cs240/checkstyle/readability/CommentedCode.java @@ -4,7 +4,8 @@ import com.puppycrawl.tools.checkstyle.api.DetailAST; import com.puppycrawl.tools.checkstyle.api.TokenTypes; -import java.util.Set; +import java.util.*; +import java.util.regex.Pattern; /** * Reports successive lines of commented out code @@ -13,13 +14,55 @@ */ public class CommentedCode extends AbstractCheck { - private static final Set CODE_LINE_END_CHARS = Set.of(';', ',', '{', '}', '(', ')', '/'); + private static final Map ENTIRE_COMMENT_CODE_PATTERNS = new HashMap<>(); + private static final Map SINGLE_LINE_CODE_PATTERNS = new HashMap<>(); + private static final Map ENTIRE_COMMENT_NONCODE_PATTERNS = new HashMap<>(); + private static final Map SINGLE_LINE_NONCODE_PATTERNS = new HashMap<>(); + + private static final Set JAVA_RESERVED_WORDS = + Set.of("abstract", "continue", "for", "new", "switch", "assert", "default", "package", "synchronized", + "boolean", "do", "if", "private", "this", "break", "double", "implements", "protected", "throw", + "byte", "else", "import", "public", "throws", "case", "enum", "instanceof", "return", "transient", + "catch", "extends", "int", "short", "try", "char", "final", "interface", "static", "void", "class", + "finally", "long", "strictfp", "volatile", "const", "float", "native", "super", "while"); + + static { + ENTIRE_COMMENT_CODE_PATTERNS.put(Pattern.compile("[\\s\\S]*(?:if|for|while)\\s*\\([\\s\\S]+\\)\\s*\\{[\\s\\S]*}[\\s\\S]*"), 0.9); //simple if/for/while + ENTIRE_COMMENT_CODE_PATTERNS.put(Pattern.compile("[\\s\\S]*if\\s*\\([\\s\\S]+\\)\\s*\\{[\\s\\S]*}\\s*else(?:\\sif\\s*\\([\\s\\S]+\\))?\\s*\\{[\\s\\S]*}[\\s\\S]*"), 0.95); //if/else or if/elseif + ENTIRE_COMMENT_CODE_PATTERNS.put(Pattern.compile("^(?:[\\s\\S]*\\n\\s*)?(?:private|public|protected)?\\s+\\w+\\s+\\w+\\s*\\([\\s\\S]*\\)\\s*\\{[\\s\\S]*}[\\s\\S]*$"), 0.9); //method declaration + + SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^(?:if|while)\\s*\\([\\s\\S]+\\)\\s*\\{?\\s$"), 0.85); //simple if/while + SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^for\\s*\\([\\s\\S]*;[\\s\\S]*;[\\s\\S]*\\)\\s*\\{?\\s$"), 0.9); //start of for loop + SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^\\w+\\s+\\w+\\s*=[\\s\\S]*$"), 0.9); //variable declaration + SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^\\w+\\s*(?:\\+|-|\\*|/|%|&|\\^|\\||<<|>>|>>>)?=[\\s\\S]*$"), 0.85); //variable assignment + SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^(?:\\w+(?:\\(.*\\))?\\.)*\\w+\\(.*\\);$"), 0.85); //method call + SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^return\\s+.*;$"), 0.9); // return statements + SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^\\w+(?:--|\\+\\+);?$"), 0.9); //increment/decrement + SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^.*;$"), 0.8); //end semicolon + SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^}$"), 0.6); //only end curly + + + ENTIRE_COMMENT_NONCODE_PATTERNS.put(Pattern.compile("^(?:\\*.*\n)*\\*.*?$"), 0.85); //Javadoc comment + ENTIRE_COMMENT_NONCODE_PATTERNS.put(Pattern.compile("^\\{(?:\\s*\"\\w+\":\\s*(?:\"?\\w+\"?|\"\"|\\[[\\s\\S]*]|\\{[\\s\\S]*}),?\\s*)+}$"), 0.75); //JSON object + + SINGLE_LINE_NONCODE_PATTERNS.put(Pattern.compile("^\\*.*$"), 0.65); //start of javadoc comment line + SINGLE_LINE_NONCODE_PATTERNS.put(Pattern.compile("^\\*\\s+@(?:param|return|throws)\\s+.*$"), 0.8); //start of javadoc comment line + SINGLE_LINE_NONCODE_PATTERNS.put(Pattern.compile("^\"\\w+\":\\s*(?:\"?[\\w: ]+\"?|\"\"),?$"), 0.65); //individual line of json field + SINGLE_LINE_NONCODE_PATTERNS.put(Pattern.compile("^(?i)(?:TODO|FIXME):.*$"), 0.8); + SINGLE_LINE_NONCODE_PATTERNS.put(Pattern.compile("^NOTE:.*$"), 0.7); + } private int min = 5; - private int firstGroupCommentedLine; + private double minConfidence = 0.5; + + private int entireCommentWeight = 15; + private int singleLineWeight = 7; + private int reservedWordWeight = 2; + + private int minWords = 10; - private int numSuccessiveLines; + private final List commentNodes = new ArrayList<>(); /** * Sets the minimum number of successive lines of commented code before reporting @@ -30,6 +73,31 @@ public void setMin(int min) { this.min = min; } + /** + * Sets the minimum confidence a selection of comments should be before reporting + * + * @param minConfidence minimum proportion + */ + public void setMinConfidence(double minConfidence) { + this.minConfidence = minConfidence; + } + + public void setEntireCommentWeight(int entireCommentWeight) { + this.entireCommentWeight = entireCommentWeight; + } + + public void setReservedWordWeight(int reservedWordWeight) { + this.reservedWordWeight = reservedWordWeight; + } + + public void setSingleLineWeight(int singleLineWeight) { + this.singleLineWeight = singleLineWeight; + } + + public void setMinWords(int minWords) { + this.minWords = minWords; + } + @Override public int[] getDefaultTokens() { return getRequiredTokens(); @@ -50,60 +118,106 @@ public boolean isCommentNodesRequired() { return true; } - @Override - public void beginTree(DetailAST rootAST) { - resetCounters(); - } - @Override public void finishTree(DetailAST rootAST) { - checkLines(); + checkCommentList(); } @Override public void visitToken(DetailAST ast) { - String[] split = ast.getText().split("\n"); - int lineNum = ast.getLineNo(); - for(int i = 0; i < split.length; i++) { - String line = split[i].trim(); + CommentNode commentNode = new CommentNode(ast, Arrays.stream(ast.getText().trim().split("\r?\n")).map(String::trim).toList()); - if(line.isBlank()) { - continue; - } - - if(CODE_LINE_END_CHARS.contains(line.charAt(line.length() - 1))) { - handleCommentedLine(lineNum + i); - } - else { - checkLines(); - resetCounters(); - } + if(!commentNodes.isEmpty() && ast.getLineNo() > commentNodes.getLast().getLastLineNo() + 1) { + checkCommentList(); } + + commentNodes.add(commentNode); } - private void handleCommentedLine(int lineNum) { - if(firstGroupCommentedLine + numSuccessiveLines < lineNum) { - checkLines(); - resetCounters(); + private void checkCommentList() { + List commentLines = new ArrayList<>(); + commentNodes.forEach(node -> commentLines.addAll(node.lines())); + + if (commentLines.stream().filter(line -> !line.isBlank()).count() < min) { + commentNodes.clear(); + return; } - if(numSuccessiveLines == 0) { - firstGroupCommentedLine = lineNum; - numSuccessiveLines = 1; + String entireComment = String.join("\n", commentLines); + double entireCommentCodeScore = maxMatcher(entireComment, ENTIRE_COMMENT_CODE_PATTERNS); + double entireCommentNonCodeScore = maxMatcher(entireComment, ENTIRE_COMMENT_NONCODE_PATTERNS); + double entireCommentScore = (entireCommentCodeScore - entireCommentNonCodeScore) * entireCommentWeight; + + double singleLineCodeScore = maxMatcher(commentLines, SINGLE_LINE_CODE_PATTERNS); + double singleLineNonCodeScore = maxMatcher(commentLines, SINGLE_LINE_NONCODE_PATTERNS); + double singleLineScore = (singleLineCodeScore - singleLineNonCodeScore) * singleLineWeight; + + String[] allWords = entireComment.trim().split("\\W+"); + double reservedWordScore = 0; + if(Arrays.stream(allWords).filter(word -> !word.isBlank()).count() > minWords) { + long reservedWordCount = Arrays.stream(allWords).filter(JAVA_RESERVED_WORDS::contains).count(); + reservedWordScore = Math.min(3.0 * reservedWordCount / allWords.length, 1) * reservedWordWeight; + } + + double totalConfidence = getTotalConfidence(reservedWordScore, entireCommentScore, singleLineScore); + if (totalConfidence >= minConfidence) { + log(commentNodes.getFirst().ast(), String.format("%d comment lines likely containing commented code (%d%% confidence)", + commentNodes.getLast().getLastLineNo() + 1 - commentNodes.getFirst().ast().getLineNo(), + Math.round(totalConfidence * 100))); + } + + commentNodes.clear(); + } + + private double getTotalConfidence(double reservedWordScore, double entireCommentScore, double singleLineScore) { + int totalWeight = 0; + double totalScore = 0; + if(reservedWordScore != 0) { + totalWeight += reservedWordWeight; + totalScore += reservedWordScore; } - else { - numSuccessiveLines++; + if(entireCommentScore != 0) { + totalWeight += entireCommentWeight; + totalScore += entireCommentScore; } + if(singleLineScore != 0) { + totalWeight += singleLineWeight; + totalScore += singleLineScore; + } + + return totalWeight == 0 ? 0 : totalScore / totalWeight; + } + + private double maxMatcher(String match, Map patternScores) { + double max = 0; + for(var entry : patternScores.entrySet()) { + if(entry.getValue() > max && entry.getKey().matcher(match).matches()) { + max = entry.getValue(); + } + } + return max; } - private void resetCounters() { - firstGroupCommentedLine = Integer.MIN_VALUE; - numSuccessiveLines = 0; + private double maxMatcher(List matches, Map patternScores) { + if(matches.isEmpty()) { + return 0; + } + + double totalScore = 0; + int totalNonBlankLines = 0; + + for(String match : matches) { + if(!match.isBlank()) { + totalScore += maxMatcher(match, patternScores); + totalNonBlankLines++; + } + } + return totalNonBlankLines == 0 ? 0 : totalScore / totalNonBlankLines; } - private void checkLines() { - if(numSuccessiveLines >= min) { - log(firstGroupCommentedLine, String.format("%d lines of commented out code", numSuccessiveLines)); + private record CommentNode(DetailAST ast, List lines) { + int getLastLineNo() { + return ast.getLineNo() + lines.size() - 1; } } } diff --git a/src/test/java/edu/byu/cs240/checkstyle/readability/CommentedCodeTest.java b/src/test/java/edu/byu/cs240/checkstyle/readability/CommentedCodeTest.java index 3620d18..883746f 100644 --- a/src/test/java/edu/byu/cs240/checkstyle/readability/CommentedCodeTest.java +++ b/src/test/java/edu/byu/cs240/checkstyle/readability/CommentedCodeTest.java @@ -60,12 +60,19 @@ public void should_FindNoErrors_when_commentInString() throws CheckstyleExceptio testFiles(0, fileName); } + @Test + @DisplayName("Should find no errors in code that has commented code below the min threshold") + public void should_FindNoErrors_when_belowMinThreshold() throws CheckstyleException { + String fileName = "testInputs/commentedCode/should_FindNoErrors_when_belowMinThreshold.java"; + testFiles(0, fileName); + } + @Test @DisplayName("Should find errors in code that has code comments") public void should_FindErrors_when_CodeComments() throws CheckstyleException { String fileName = "testInputs/commentedCode/should_FindErrors_when_CodeComments.java"; - testFiles(2, fileName); + testFiles(3, fileName); } diff --git a/src/test/resources/edu/byu/cs240/checkstyle/readability/testInputs/commentedCode/should_FindErrors_when_CodeComments.java b/src/test/resources/edu/byu/cs240/checkstyle/readability/testInputs/commentedCode/should_FindErrors_when_CodeComments.java index 4beab6e..776b323 100644 --- a/src/test/resources/edu/byu/cs240/checkstyle/readability/testInputs/commentedCode/should_FindErrors_when_CodeComments.java +++ b/src/test/resources/edu/byu/cs240/checkstyle/readability/testInputs/commentedCode/should_FindErrors_when_CodeComments.java @@ -15,5 +15,33 @@ public void hi() { // int i = 2477; // int j = 1734; // System.out.println(i + j); + System.out.println("Hello World!"); + int i = 1234; + int j = 4321; + System.out.println(i + j); + + +// +// +// System.out.println("Hello World!"); +// +// +// +// int i = 7; +// +// +// +// +// int j = 4; +// +// +// +// +// System.out.println(i + j); +// +// + + + } } \ No newline at end of file diff --git a/src/test/resources/edu/byu/cs240/checkstyle/readability/testInputs/commentedCode/should_FindNoErrors_when_OtherComments.java b/src/test/resources/edu/byu/cs240/checkstyle/readability/testInputs/commentedCode/should_FindNoErrors_when_OtherComments.java index 8f6d9da..cec8181 100644 --- a/src/test/resources/edu/byu/cs240/checkstyle/readability/testInputs/commentedCode/should_FindNoErrors_when_OtherComments.java +++ b/src/test/resources/edu/byu/cs240/checkstyle/readability/testInputs/commentedCode/should_FindNoErrors_when_OtherComments.java @@ -9,4 +9,23 @@ public void hi() { int j = 4; System.out.println(i + j); } + + /** + * Lol idk seemed like a fun time; + * Javadoc comments can be weird sometimes; + * Documentation is great to have; + * but seems like most devs don't like writing it; + * + * @param a yes (extra details) + * @param b no (but is it actually?) + * @param c sometimes, but semicolons; + * @param d occasionally, + * @param e often ( + * @param f almost always / + * @param g literally never } + * @param h the only one actually used lol { + */ + public void hi(int a, int b, Object c, char d, boolean e, long f, Void g, String h) { + System.out.println(h + " lol!"); + } } \ No newline at end of file diff --git a/src/test/resources/edu/byu/cs240/checkstyle/readability/testInputs/commentedCode/should_FindNoErrors_when_belowMinThreshold.java b/src/test/resources/edu/byu/cs240/checkstyle/readability/testInputs/commentedCode/should_FindNoErrors_when_belowMinThreshold.java new file mode 100644 index 0000000..d4e62ce --- /dev/null +++ b/src/test/resources/edu/byu/cs240/checkstyle/readability/testInputs/commentedCode/should_FindNoErrors_when_belowMinThreshold.java @@ -0,0 +1,20 @@ +import java.util.Scanner; + +class should_FindNoErrors_when_belowMinThreshold { + public void hi() { + +// System.out.println("Hello World!"); + + + System.out.println("Hello World!"); + + +// +// +// System.out.println("Shouldn't count blank lines towards total"); +// System.out.println("Hello World!"); +// +// + + } +}