Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.byu.cs240</groupId>
<artifactId>checkstyle</artifactId>
<version>1.0.8</version>
<version>1.0.9</version>
<name>C S 240 Checkstyle Checks</name>
<properties>
<maven.compiler.source>21</maven.compiler.source>
Expand Down
194 changes: 154 additions & 40 deletions src/main/java/edu/byu/cs240/checkstyle/readability/CommentedCode.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import com.puppycrawl.tools.checkstyle.api.DetailAST;
import com.puppycrawl.tools.checkstyle.api.TokenTypes;

import java.util.Set;
import java.util.*;
import java.util.regex.Pattern;

/**
* Reports successive lines of commented out code
Expand All @@ -13,13 +14,55 @@
*/
public class CommentedCode extends AbstractCheck {

private static final Set<Character> CODE_LINE_END_CHARS = Set.of(';', ',', '{', '}', '(', ')', '/');
private static final Map<Pattern, Double> ENTIRE_COMMENT_CODE_PATTERNS = new HashMap<>();
private static final Map<Pattern, Double> SINGLE_LINE_CODE_PATTERNS = new HashMap<>();
private static final Map<Pattern, Double> ENTIRE_COMMENT_NONCODE_PATTERNS = new HashMap<>();
private static final Map<Pattern, Double> SINGLE_LINE_NONCODE_PATTERNS = new HashMap<>();

private static final Set<String> JAVA_RESERVED_WORDS =
Set.of("abstract", "continue", "for", "new", "switch", "assert", "default", "package", "synchronized",
"boolean", "do", "if", "private", "this", "break", "double", "implements", "protected", "throw",
"byte", "else", "import", "public", "throws", "case", "enum", "instanceof", "return", "transient",
"catch", "extends", "int", "short", "try", "char", "final", "interface", "static", "void", "class",
"finally", "long", "strictfp", "volatile", "const", "float", "native", "super", "while");

static {
ENTIRE_COMMENT_CODE_PATTERNS.put(Pattern.compile("[\\s\\S]*(?:if|for|while)\\s*\\([\\s\\S]+\\)\\s*\\{[\\s\\S]*}[\\s\\S]*"), 0.9); //simple if/for/while
ENTIRE_COMMENT_CODE_PATTERNS.put(Pattern.compile("[\\s\\S]*if\\s*\\([\\s\\S]+\\)\\s*\\{[\\s\\S]*}\\s*else(?:\\sif\\s*\\([\\s\\S]+\\))?\\s*\\{[\\s\\S]*}[\\s\\S]*"), 0.95); //if/else or if/elseif
ENTIRE_COMMENT_CODE_PATTERNS.put(Pattern.compile("^(?:[\\s\\S]*\\n\\s*)?(?:private|public|protected)?\\s+\\w+\\s+\\w+\\s*\\([\\s\\S]*\\)\\s*\\{[\\s\\S]*}[\\s\\S]*$"), 0.9); //method declaration

SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^(?:if|while)\\s*\\([\\s\\S]+\\)\\s*\\{?\\s$"), 0.85); //simple if/while
SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^for\\s*\\([\\s\\S]*;[\\s\\S]*;[\\s\\S]*\\)\\s*\\{?\\s$"), 0.9); //start of for loop
SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^\\w+\\s+\\w+\\s*=[\\s\\S]*$"), 0.9); //variable declaration
SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^\\w+\\s*(?:\\+|-|\\*|/|%|&|\\^|\\||<<|>>|>>>)?=[\\s\\S]*$"), 0.85); //variable assignment
SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^(?:\\w+(?:\\(.*\\))?\\.)*\\w+\\(.*\\);$"), 0.85); //method call
SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^return\\s+.*;$"), 0.9); // return statements
SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^\\w+(?:--|\\+\\+);?$"), 0.9); //increment/decrement
SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^.*;$"), 0.8); //end semicolon
SINGLE_LINE_CODE_PATTERNS.put(Pattern.compile("^}$"), 0.6); //only end curly


ENTIRE_COMMENT_NONCODE_PATTERNS.put(Pattern.compile("^(?:\\*.*\n)*\\*.*?$"), 0.85); //Javadoc comment
ENTIRE_COMMENT_NONCODE_PATTERNS.put(Pattern.compile("^\\{(?:\\s*\"\\w+\":\\s*(?:\"?\\w+\"?|\"\"|\\[[\\s\\S]*]|\\{[\\s\\S]*}),?\\s*)+}$"), 0.75); //JSON object

SINGLE_LINE_NONCODE_PATTERNS.put(Pattern.compile("^\\*.*$"), 0.65); //start of javadoc comment line
SINGLE_LINE_NONCODE_PATTERNS.put(Pattern.compile("^\\*\\s+@(?:param|return|throws)\\s+.*$"), 0.8); //start of javadoc comment line
SINGLE_LINE_NONCODE_PATTERNS.put(Pattern.compile("^\"\\w+\":\\s*(?:\"?[\\w: ]+\"?|\"\"),?$"), 0.65); //individual line of json field
SINGLE_LINE_NONCODE_PATTERNS.put(Pattern.compile("^(?i)(?:TODO|FIXME):.*$"), 0.8);
SINGLE_LINE_NONCODE_PATTERNS.put(Pattern.compile("^NOTE:.*$"), 0.7);
}
Comment on lines +29 to +53
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Maillman Are there any other code patterns or common non-code patterns you think we should be checking for?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is robust enough, and already significantly better than the previous commented code check. I don't think there's anything else I would add.


private int min = 5;

private int firstGroupCommentedLine;
private double minConfidence = 0.5;

private int entireCommentWeight = 15;
private int singleLineWeight = 7;
private int reservedWordWeight = 2;

private int minWords = 10;

private int numSuccessiveLines;
private final List<CommentNode> commentNodes = new ArrayList<>();

/**
* Sets the minimum number of successive lines of commented code before reporting
Expand All @@ -30,6 +73,31 @@ public void setMin(int min) {
this.min = min;
}

/**
* Sets the minimum confidence a selection of comments should be before reporting
*
* @param minConfidence minimum proportion
*/
public void setMinConfidence(double minConfidence) {
this.minConfidence = minConfidence;
}

public void setEntireCommentWeight(int entireCommentWeight) {
this.entireCommentWeight = entireCommentWeight;
}

public void setReservedWordWeight(int reservedWordWeight) {
this.reservedWordWeight = reservedWordWeight;
}

public void setSingleLineWeight(int singleLineWeight) {
this.singleLineWeight = singleLineWeight;
}

public void setMinWords(int minWords) {
this.minWords = minWords;
}

@Override
public int[] getDefaultTokens() {
return getRequiredTokens();
Expand All @@ -50,60 +118,106 @@ public boolean isCommentNodesRequired() {
return true;
}

@Override
public void beginTree(DetailAST rootAST) {
resetCounters();
}

@Override
public void finishTree(DetailAST rootAST) {
checkLines();
checkCommentList();
}

@Override
public void visitToken(DetailAST ast) {
String[] split = ast.getText().split("\n");
int lineNum = ast.getLineNo();
for(int i = 0; i < split.length; i++) {
String line = split[i].trim();
CommentNode commentNode = new CommentNode(ast, Arrays.stream(ast.getText().trim().split("\r?\n")).map(String::trim).toList());

if(line.isBlank()) {
continue;
}

if(CODE_LINE_END_CHARS.contains(line.charAt(line.length() - 1))) {
handleCommentedLine(lineNum + i);
}
else {
checkLines();
resetCounters();
}
if(!commentNodes.isEmpty() && ast.getLineNo() > commentNodes.getLast().getLastLineNo() + 1) {
checkCommentList();
}

commentNodes.add(commentNode);
}

private void handleCommentedLine(int lineNum) {
if(firstGroupCommentedLine + numSuccessiveLines < lineNum) {
checkLines();
resetCounters();
private void checkCommentList() {
List<String> commentLines = new ArrayList<>();
commentNodes.forEach(node -> commentLines.addAll(node.lines()));

if (commentLines.stream().filter(line -> !line.isBlank()).count() < min) {
commentNodes.clear();
return;
}

if(numSuccessiveLines == 0) {
firstGroupCommentedLine = lineNum;
numSuccessiveLines = 1;
String entireComment = String.join("\n", commentLines);
double entireCommentCodeScore = maxMatcher(entireComment, ENTIRE_COMMENT_CODE_PATTERNS);
double entireCommentNonCodeScore = maxMatcher(entireComment, ENTIRE_COMMENT_NONCODE_PATTERNS);
double entireCommentScore = (entireCommentCodeScore - entireCommentNonCodeScore) * entireCommentWeight;

double singleLineCodeScore = maxMatcher(commentLines, SINGLE_LINE_CODE_PATTERNS);
double singleLineNonCodeScore = maxMatcher(commentLines, SINGLE_LINE_NONCODE_PATTERNS);
double singleLineScore = (singleLineCodeScore - singleLineNonCodeScore) * singleLineWeight;

String[] allWords = entireComment.trim().split("\\W+");
double reservedWordScore = 0;
if(Arrays.stream(allWords).filter(word -> !word.isBlank()).count() > minWords) {
long reservedWordCount = Arrays.stream(allWords).filter(JAVA_RESERVED_WORDS::contains).count();
reservedWordScore = Math.min(3.0 * reservedWordCount / allWords.length, 1) * reservedWordWeight;
}

double totalConfidence = getTotalConfidence(reservedWordScore, entireCommentScore, singleLineScore);
if (totalConfidence >= minConfidence) {
log(commentNodes.getFirst().ast(), String.format("%d comment lines likely containing commented code (%d%% confidence)",
commentNodes.getLast().getLastLineNo() + 1 - commentNodes.getFirst().ast().getLineNo(),
Math.round(totalConfidence * 100)));
}

commentNodes.clear();
}

private double getTotalConfidence(double reservedWordScore, double entireCommentScore, double singleLineScore) {
int totalWeight = 0;
double totalScore = 0;
if(reservedWordScore != 0) {
totalWeight += reservedWordWeight;
totalScore += reservedWordScore;
}
else {
numSuccessiveLines++;
if(entireCommentScore != 0) {
totalWeight += entireCommentWeight;
totalScore += entireCommentScore;
}
if(singleLineScore != 0) {
totalWeight += singleLineWeight;
totalScore += singleLineScore;
}

return totalWeight == 0 ? 0 : totalScore / totalWeight;
}

private double maxMatcher(String match, Map<Pattern, Double> patternScores) {
double max = 0;
for(var entry : patternScores.entrySet()) {
if(entry.getValue() > max && entry.getKey().matcher(match).matches()) {
max = entry.getValue();
}
}
return max;
}

private void resetCounters() {
firstGroupCommentedLine = Integer.MIN_VALUE;
numSuccessiveLines = 0;
private double maxMatcher(List<String> matches, Map<Pattern, Double> patternScores) {
if(matches.isEmpty()) {
return 0;
}

double totalScore = 0;
int totalNonBlankLines = 0;

for(String match : matches) {
if(!match.isBlank()) {
totalScore += maxMatcher(match, patternScores);
totalNonBlankLines++;
}
}
return totalNonBlankLines == 0 ? 0 : totalScore / totalNonBlankLines;
}

private void checkLines() {
if(numSuccessiveLines >= min) {
log(firstGroupCommentedLine, String.format("%d lines of commented out code", numSuccessiveLines));
private record CommentNode(DetailAST ast, List<String> lines) {
int getLastLineNo() {
return ast.getLineNo() + lines.size() - 1;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,19 @@ public void should_FindNoErrors_when_commentInString() throws CheckstyleExceptio
testFiles(0, fileName);
}

@Test
@DisplayName("Should find no errors in code that has commented code below the min threshold")
public void should_FindNoErrors_when_belowMinThreshold() throws CheckstyleException {
String fileName = "testInputs/commentedCode/should_FindNoErrors_when_belowMinThreshold.java";
testFiles(0, fileName);
}


@Test
@DisplayName("Should find errors in code that has code comments")
public void should_FindErrors_when_CodeComments() throws CheckstyleException {
String fileName = "testInputs/commentedCode/should_FindErrors_when_CodeComments.java";
testFiles(2, fileName);
testFiles(3, fileName);
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,33 @@ public void hi() {
// int i = 2477;
// int j = 1734;
// System.out.println(i + j);
System.out.println("Hello World!");
int i = 1234;
int j = 4321;
System.out.println(i + j);


//
//
// System.out.println("Hello World!");
//
//
//
// int i = 7;
//
//
//
//
// int j = 4;
//
//
//
//
// System.out.println(i + j);
//
//



}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,23 @@ public void hi() {
int j = 4;
System.out.println(i + j);
}

/**
* Lol idk seemed like a fun time;
* Javadoc comments can be weird sometimes;
* Documentation is great to have;
* but seems like most devs don't like writing it;
*
* @param a yes (extra details)
* @param b no (but is it actually?)
* @param c sometimes, but semicolons;
* @param d occasionally,
* @param e often (
* @param f almost always /
* @param g literally never }
* @param h the only one actually used lol {
*/
public void hi(int a, int b, Object c, char d, boolean e, long f, Void g, String h) {
System.out.println(h + " lol!");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import java.util.Scanner;

class should_FindNoErrors_when_belowMinThreshold {
public void hi() {

// System.out.println("Hello World!");


System.out.println("Hello World!");


//
//
// System.out.println("Shouldn't count blank lines towards total");
// System.out.println("Hello World!");
//
//

}
}