From e22de5ab22ecad28949fc3c3d94a9b0fa650dcf7 Mon Sep 17 00:00:00 2001 From: Bryce Vonilten Date: Wed, 11 Dec 2019 11:26:53 -0800 Subject: [PATCH 01/11] Moved file loading into main() and added CL input for missing credentials (part 1) --- src/MainParser.java | 71 +++++++++++++++++++++++++++++++++++++++++- src/config/Config.java | 63 +++++++++++++++++++++---------------- 2 files changed, 107 insertions(+), 27 deletions(-) diff --git a/src/MainParser.java b/src/MainParser.java index c583b75..fb6d20f 100644 --- a/src/MainParser.java +++ b/src/MainParser.java @@ -1,5 +1,16 @@ +import config.Config; +import org.apache.commons.configuration2.Configuration; +import org.apache.commons.configuration2.FileBasedConfiguration; +import org.apache.commons.configuration2.PropertiesConfiguration; +import org.apache.commons.configuration2.builder.FileBasedConfigurationBuilder; +import org.apache.commons.configuration2.builder.fluent.Parameters; +import org.apache.commons.configuration2.ex.ConfigurationException; import org.json.JSONException; +import java.util.HashMap; +import java.util.Map; +import java.util.Scanner; + public class MainParser { // public static final String gitURL = "https://github.com/janani-sridhar/CaesarCipher"; // public static final String directory = "/Users/jananisridhar/Desktop/CC"; @@ -7,10 +18,68 @@ public class MainParser { // // public static FileParser fp; // public static JSONifySummary summary; - + + + public static void testTermination(String s) { + if(s.equals("q")) { + System.exit(0); + } + } + public static void main(String[] args) throws JSONException { //summary = new JSONifySummary(); //fp = new FileParser(summary); + Scanner reader = new Scanner(System.in); + + Map configMap = new HashMap<>(); + configMap.put("authToken", null); + configMap.put("mongoUrl", null); + configMap.put("mongoUsername", null); + configMap.put("mongoPassword", null); + configMap.put("mongoDatabase", null); + configMap.put("mongoCollection", null); + configMap.put("tempJavaFilePath", null); + configMap.put("tempJSONFilePath", null); + configMap.put("repoURLsPath", null); + + Parameters params = new Parameters(); + FileBasedConfigurationBuilder builder = + new FileBasedConfigurationBuilder(PropertiesConfiguration.class) + .configure(params.properties().setFileName("javaAnalysis.properties")); + try { + Configuration config = builder.getConfiguration(); + + configMap.put("tempJavaFilePath", config.getString("tempJavaFilePath").replaceAll("\"", "")); + configMap.put("tempJSONFilePath", config.getString("tempJSONFilePath").replaceAll("\"", "")); + configMap.put("authToken", config.getString("authToken").replaceAll("\"", "")); + configMap.put("repoURLsPath", config.getString("repoURLsPath").replaceAll("\"", "")); + configMap.put("mongoUsername", config.getString("mongoUsername")); + configMap.put("mongoPassword", config.getString("mongoPassword")); + configMap.put("mongoUrl", config.getString("mongoUrl")); + configMap.put("mongoDatabase", config.getString("mongoDatabase")); + configMap.put("mongoCollection", config.getString("mongoCollection")); + } + catch(ConfigurationException e) { + System.out.println("No javaAnalysis.properties found...\n"); + //e.printStackTrace(); + } + + for (Map.Entry entry : configMap.entrySet()) { + String key = entry.getKey(); + + while (configMap.get(key) == null || configMap.get(key).isEmpty()) { + System.out.print("Missing " + key + ". Enter one or [q] to quit: "); + String s = reader.nextLine().trim(); + System.out.print("\n"); + testTermination(s.toLowerCase()); + configMap.put(key, s); + } + } + + Config.init(configMap.get("authToken"), configMap.get("mongoUsername"), configMap.get("mongoPassword"), + configMap.get("mongoUrl"), configMap.get("mongoDatabase"), configMap.get("mongoCollection"), + configMap.get("tempJavaFilePath"), configMap.get("tempJSONFilePath"), configMap.get("repoURLsPath")); + RepoTraversal traverser = new RepoTraversal(); traverser.findJavaFilesToParse(); diff --git a/src/config/Config.java b/src/config/Config.java index 199f8fc..81e5746 100644 --- a/src/config/Config.java +++ b/src/config/Config.java @@ -1,31 +1,42 @@ package config; -import org.apache.commons.configuration2.Configuration; -import org.apache.commons.configuration2.FileBasedConfiguration; -import org.apache.commons.configuration2.PropertiesConfiguration; -import org.apache.commons.configuration2.builder.FileBasedConfigurationBuilder; -import org.apache.commons.configuration2.builder.fluent.Parameters; -import org.apache.commons.configuration2.ex.ConfigurationException; /** - * In order to use this class, create a javaAnalysis.properties file in the root directory of the project. + * In order to use this class, create a javaAnalysis.properties file in the root directory of the project or manually + * input these as they are found missing during the runtime. * Check the README for everything you will need to define in here. * Do not push the .properties file to the repo, as it holds private information. */ + public class Config { // static instance private static Config instance; // private info - private String authToken; - private String mongoUrl; - private String mongoUsername; - private String mongoPassword; - private String mongoDatabase; - private String mongoCollection; + private static String authToken; + private static String mongoUrl; + private static String mongoUsername; + private static String mongoPassword; + private static String mongoDatabase; + private static String mongoCollection; // non private info - private String tempJavaFilePath; - private String tempJSONFilePath; - private String repoURLsPath; + private static String tempJavaFilePath; + private static String tempJSONFilePath; + private static String repoURLsPath; + + public static void init(String newAuthToken, String newMongoUsername, String newMongoPassword, String newMongoUrl, + String newMongoDatabase, String newMongoCollection, String newTempJavaFilePath, + String newTempJSONFilePath, String newRepoURLsPath) { + authToken = newAuthToken; + mongoUsername = newMongoUsername; + mongoPassword = newMongoPassword; + mongoUrl = newMongoUrl; + mongoDatabase = newMongoDatabase; + mongoCollection = newMongoCollection; + tempJavaFilePath = newTempJavaFilePath; + tempJSONFilePath = newTempJSONFilePath; + repoURLsPath = newRepoURLsPath; + } + /* public static Config getInstance() { if(instance == null) { instance = new Config(); @@ -44,7 +55,6 @@ private Config() { tempJSONFilePath = config.getString("tempJSONFilePath").replaceAll("\"", ""); authToken = config.getString("authToken").replaceAll("\"", ""); repoURLsPath = config.getString("repoURLsPath").replaceAll("\"", ""); - mongoUrl = config.getString("mongoUrl"); mongoUsername = config.getString("mongoUsername"); mongoPassword = config.getString("mongoPassword"); mongoUrl = config.getString("mongoUrl"); @@ -55,40 +65,41 @@ private Config() { e.printStackTrace(); } } + */ - public String getTempJavaFilePath() { + public static String getTempJavaFilePath() { return tempJavaFilePath; } - public String getAuthToken() { + public static String getAuthToken() { return authToken; } - public String getRepoURLsPath() { + public static String getRepoURLsPath() { return repoURLsPath; } - public String getTempJSONFilePath() { + public static String getTempJSONFilePath() { return tempJSONFilePath; } - public String getMongoUrl() { + public static String getMongoUrl() { return mongoUrl; } - public String getMongoUsername() { + public static String getMongoUsername() { return mongoUsername; } - public String getMongoPassword() { + public static String getMongoPassword() { return mongoPassword; } - public String getMongoDatabase() { + public static String getMongoDatabase() { return mongoDatabase; } - public String getMongoCollection() { + public static String getMongoCollection() { return mongoCollection; } } From b1d0e63bf354ad103b1d2fed32adb1effd130718 Mon Sep 17 00:00:00 2001 From: Bryce Vonilten Date: Wed, 11 Dec 2019 11:27:49 -0800 Subject: [PATCH 02/11] Updated classes that use the config singleton to now use the static implementation (part 2) --- src/FileParser.java | 16 +++++----------- src/JSONify.java | 2 +- src/RepoTraversal.java | 22 +++++++++------------- src/mongo/MongoCollectionClient.java | 11 +++++------ 4 files changed, 20 insertions(+), 31 deletions(-) diff --git a/src/FileParser.java b/src/FileParser.java index c9e0f51..dac17fb 100644 --- a/src/FileParser.java +++ b/src/FileParser.java @@ -1,13 +1,4 @@ -import java.io.*; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.Optional; - -import com.github.javaparser.JavaParser; import com.github.javaparser.ParseProblemException; -import com.github.javaparser.ParserConfiguration; import com.github.javaparser.StaticJavaParser; import com.github.javaparser.ast.CompilationUnit; import com.github.javaparser.ast.ImportDeclaration; @@ -19,6 +10,10 @@ import config.Config; import org.json.JSONObject; +import java.io.File; +import java.io.FileInputStream; +import java.util.*; + public class FileParser { // Parser objects -- for calling methods in other classes private ParsingHelper ph = new ParsingHelper(); @@ -38,8 +33,7 @@ public class FileParser { protected ArrayList methodWPs = new ArrayList<>(); protected NameResults nr = new NameResults(); - private static final Config config = Config.getInstance(); - private static final String tempFilePath = config.getTempJavaFilePath(); + private static final String tempFilePath = Config.getTempJavaFilePath(); private JSONify jsonify; diff --git a/src/JSONify.java b/src/JSONify.java index 07d93c8..ddc4f09 100644 --- a/src/JSONify.java +++ b/src/JSONify.java @@ -7,7 +7,7 @@ public class JSONify { private FileParser fp; - private String tempJSONFilePath = Config.getInstance().getTempJSONFilePath(); + private String tempJSONFilePath = Config.getTempJSONFilePath(); public JSONify(FileParser fp) { this.fp = fp; diff --git a/src/RepoTraversal.java b/src/RepoTraversal.java index 4b9de72..a3cff55 100644 --- a/src/RepoTraversal.java +++ b/src/RepoTraversal.java @@ -1,24 +1,20 @@ -import java.io.*; - -import com.mongodb.DBCursor; import com.mongodb.client.FindIterable; import config.Config; import mongo.MongoCollectionClient; +import org.apache.commons.codec.binary.Base64; import org.bson.Document; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; -import java.util.ArrayList; -import java.net.HttpURLConnection; -import java.net.URL; -import org.apache.commons.codec.binary.Base64; import util.UrlFilepathPair; -import javax.print.Doc; +import java.io.*; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.ArrayList; public class RepoTraversal { - private static final Config config = Config.getInstance(); - private static final String tempFilePath = config.getTempJavaFilePath(); + private static final String tempFilePath = Config.getTempJavaFilePath(); private long numFiles; private long numJavaFiles; @@ -44,7 +40,7 @@ public void findJavaFilesToParse() { private void markFileAsDone(String repoName, ArrayList repoURLS) { ArrayList lines = new ArrayList(); try { - String repoListPath = config.getRepoURLsPath(); + String repoListPath = Config.getRepoURLsPath(); BufferedReader br = new BufferedReader(new FileReader(repoListPath)); String line; while ((line = br.readLine()) != null) { @@ -226,7 +222,7 @@ private JSONObject recurseForJSONObject(JSONObject source, String[] keys) { private JSONObject makeGetRequest(String urlString) throws CustomException { try { - String authToken = config.getAuthToken(); + String authToken = Config.getAuthToken(); URL url = new URL(urlString); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestProperty("Authorization", "Bearer " + authToken); @@ -264,7 +260,7 @@ private String getDefaultBranch(String url) { } private ArrayList getRepoURLsFromConfig() { - String repoURLsPath = config.getRepoURLsPath(); + String repoURLsPath = Config.getRepoURLsPath(); ArrayList urls = new ArrayList<>(); try{ BufferedReader br = new BufferedReader(new FileReader(repoURLsPath)); diff --git a/src/mongo/MongoCollectionClient.java b/src/mongo/MongoCollectionClient.java index 6c8af9e..cf03a42 100644 --- a/src/mongo/MongoCollectionClient.java +++ b/src/mongo/MongoCollectionClient.java @@ -25,12 +25,11 @@ public class MongoCollectionClient { private static final Integer port = 27017; private MongoCollectionClient() { - Config mdbConfig = Config.getInstance(); - String database = mdbConfig.getMongoDatabase(); - String username = mdbConfig.getMongoUsername(); - String password = mdbConfig.getMongoPassword(); - String url = mdbConfig.getMongoUrl(); - String collection = mdbConfig.getMongoCollection(); + String database = Config.getMongoDatabase(); + String username = Config.getMongoUsername(); + String password = Config.getMongoPassword(); + String url = Config.getMongoUrl(); + String collection = Config.getMongoCollection(); try { MongoClientURI uri = new MongoClientURI( "mongodb+srv://"+username+":"+password+"@"+url+"/test?retryWrites=true"); From 86700ee537d85fae2f0876a34d7e926c3e405fd2 Mon Sep 17 00:00:00 2001 From: Bryce Vonilten Date: Wed, 11 Dec 2019 14:08:49 -0800 Subject: [PATCH 03/11] Added the ability to fetch repo names dynamically (unlike python branch, there prev no way to do this). Gets only by stars right now --- src/GetRepoNames.java | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 src/GetRepoNames.java diff --git a/src/GetRepoNames.java b/src/GetRepoNames.java new file mode 100644 index 0000000..82edc61 --- /dev/null +++ b/src/GetRepoNames.java @@ -0,0 +1,38 @@ +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +import java.util.ArrayList; + +public class GetRepoNames { + +// public ArrayList getReposByKeywords (int resultLimit, String [] keywords) { +// ArrayList results = new ArrayList(); +// +// return results; +// } + + public ArrayList getReposByStars (int resultLimit) { + ArrayList results = new ArrayList(); + RepoTraversal util = new RepoTraversal(); //The makeGetRequest() should be moved into its own class to avoid implementation here + + try { + JSONObject result = util.makeGetRequest("https://api.github.com/search/repositories?" + + "q=language:java&sort=stars&order=desc&per_page=" + resultLimit + "&page=1"); + JSONArray jsonArray = result.getJSONArray("items"); + for (int i = 0, size = jsonArray.length(); i < size; i++) { + JSONObject objectInArray = jsonArray.getJSONObject(i); + String r = "https://api.github.com/repos/" + objectInArray.getString("full_name"); + r += util.getDefaultBranch(r); + results.add(r); + } + } catch (CustomException e) { + e.printStackTrace(); + } catch (JSONException e) { + e.printStackTrace(); + } + + return results; + } +} + From 92b1108d2bac5865f7a846d763cefbca00d46221 Mon Sep 17 00:00:00 2001 From: Bryce Vonilten Date: Wed, 11 Dec 2019 14:10:36 -0800 Subject: [PATCH 04/11] Made changes to other classes so user can dynamically generate repo names and limit the number of repo results --- src/MainParser.java | 55 ++++++++++++++++++++++++++++++++++++------ src/RepoTraversal.java | 24 ++++++++++++------ 2 files changed, 64 insertions(+), 15 deletions(-) diff --git a/src/MainParser.java b/src/MainParser.java index fb6d20f..96084c4 100644 --- a/src/MainParser.java +++ b/src/MainParser.java @@ -29,6 +29,7 @@ public static void testTermination(String s) { public static void main(String[] args) throws JSONException { //summary = new JSONifySummary(); //fp = new FileParser(summary); + RepoTraversal traverser = new RepoTraversal(); Scanner reader = new Scanner(System.in); Map configMap = new HashMap<>(); @@ -48,11 +49,17 @@ public static void main(String[] args) throws JSONException { .configure(params.properties().setFileName("javaAnalysis.properties")); try { Configuration config = builder.getConfiguration(); + String temp; + //Properties not found will remain null + temp = config.getString("tempJavaFilePath"); + configMap.put("tempJavaFilePath", (temp !=null)? temp.replaceAll("\"", "") : null); + temp = config.getString("tempJSONFilePath"); + configMap.put("tempJSONFilePath", (temp !=null)? temp.replaceAll("\"", "") : null); + temp = config.getString("authToken"); + configMap.put("authToken", (temp !=null)? temp.replaceAll("\"", "") : null); + temp = config.getString("repoURLsPath"); + configMap.put("repoURLsPath", (temp !=null)? temp.replaceAll("\"", "") : null); - configMap.put("tempJavaFilePath", config.getString("tempJavaFilePath").replaceAll("\"", "")); - configMap.put("tempJSONFilePath", config.getString("tempJSONFilePath").replaceAll("\"", "")); - configMap.put("authToken", config.getString("authToken").replaceAll("\"", "")); - configMap.put("repoURLsPath", config.getString("repoURLsPath").replaceAll("\"", "")); configMap.put("mongoUsername", config.getString("mongoUsername")); configMap.put("mongoPassword", config.getString("mongoPassword")); configMap.put("mongoUrl", config.getString("mongoUrl")); @@ -64,9 +71,14 @@ public static void main(String[] args) throws JSONException { //e.printStackTrace(); } + //Ask for missing creds via the CL (exception for filename) for (Map.Entry entry : configMap.entrySet()) { String key = entry.getKey(); + if (key.equals("repoURLsPath")) { + continue; + } + while (configMap.get(key) == null || configMap.get(key).isEmpty()) { System.out.print("Missing " + key + ". Enter one or [q] to quit: "); String s = reader.nextLine().trim(); @@ -76,13 +88,42 @@ public static void main(String[] args) throws JSONException { } } + String inputType = ""; + Integer limitRepos = null; + while (!inputType.equals("f") && !inputType.equals("g")) { + System.out.print("Do you want to get repo names from file [f] or generate them [g]. Enter one or [q] to quit: "); + inputType = reader.nextLine().trim(); + System.out.print("\n"); + testTermination(inputType.toLowerCase()); + } + + if (inputType.equals("f")) { + while (configMap.get("repoURLsPath") == null || configMap.get("repoURLsPath").isEmpty()) { + System.out.print("Missing repoURLsPath. Enter one or [q] to quit: "); + String s = reader.nextLine().trim(); + System.out.print("\n"); + testTermination(s.toLowerCase()); + configMap.put("repoURLsPath", s); + } + } else { + while (limitRepos == null) { + System.out.print("How many repos do you want to fetch? Enter one or [q] to quit: "); + String s = reader.nextLine().trim(); + System.out.print("\n"); + testTermination(s.toLowerCase()); + try { + limitRepos = Integer.parseInt(s); + } catch (NumberFormatException e) { + continue; + } + } + } + Config.init(configMap.get("authToken"), configMap.get("mongoUsername"), configMap.get("mongoPassword"), configMap.get("mongoUrl"), configMap.get("mongoDatabase"), configMap.get("mongoCollection"), configMap.get("tempJavaFilePath"), configMap.get("tempJSONFilePath"), configMap.get("repoURLsPath")); - RepoTraversal traverser = new RepoTraversal(); - - traverser.findJavaFilesToParse(); + traverser.findJavaFilesToParse(inputType, limitRepos); } /*public static void getGitFiles(String url, String directory) { diff --git a/src/RepoTraversal.java b/src/RepoTraversal.java index a3cff55..ed82a77 100644 --- a/src/RepoTraversal.java +++ b/src/RepoTraversal.java @@ -14,16 +14,22 @@ import java.util.ArrayList; public class RepoTraversal { - private static final String tempFilePath = Config.getTempJavaFilePath(); - private long numFiles; private long numJavaFiles; public RepoTraversal() { } - public void findJavaFilesToParse() { - ArrayList repoURLs = getRepoURLsFromConfig(); + public void findJavaFilesToParse(String inputType, Integer limitRepos) { + ArrayList repoURLs; + + if (inputType.equals("f")) { + repoURLs = getRepoURLsFromConfig(); + } else { + GetRepoNames getRepoNames = new GetRepoNames(); + repoURLs = getRepoNames.getReposByStars(limitRepos); + } + int count = 0; for(String url : repoURLs) { System.out.println("Analyzing " + url + " (" + (++count) + "/" + repoURLs.size() + ")"); @@ -31,7 +37,9 @@ public void findJavaFilesToParse() { if (result != null) { insertRepoSummary(result); String repoName = url.split("/repos/")[1].split("/branches")[0]; - markFileAsDone(repoName, repoURLs); + if (inputType.equals("f")) { + markFileAsDone(repoName, repoURLs); + } System.out.println("Finished analyzing " + url+ " (" + (count) + "/" + repoURLs.size() + ")"); } } @@ -81,7 +89,7 @@ public void storeFileLocally(String content) { FileOutputStream fos = null; File file; try { - file = new File(tempFilePath); + file = new File(Config.getTempJavaFilePath()); if(!file.exists()) { file.createNewFile(); } @@ -220,7 +228,7 @@ private JSONObject recurseForJSONObject(JSONObject source, String[] keys) { return object; } - private JSONObject makeGetRequest(String urlString) throws CustomException { + public JSONObject makeGetRequest(String urlString) throws CustomException { try { String authToken = Config.getAuthToken(); URL url = new URL(urlString); @@ -247,7 +255,7 @@ private JSONObject makeGetRequest(String urlString) throws CustomException { throw new CustomException("Could not make get request."); } - private String getDefaultBranch(String url) { + public String getDefaultBranch(String url) { StringBuilder sb = new StringBuilder(); sb.append("/branches/"); try { From eb86ec47fd182b302cf4dfd3131414628ff0ea2e Mon Sep 17 00:00:00 2001 From: Bryce Vonilten Date: Wed, 11 Dec 2019 14:11:42 -0800 Subject: [PATCH 05/11] Made CL message for number of repos clearer --- src/MainParser.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/MainParser.java b/src/MainParser.java index 96084c4..f1ca687 100644 --- a/src/MainParser.java +++ b/src/MainParser.java @@ -107,7 +107,8 @@ public static void main(String[] args) throws JSONException { } } else { while (limitRepos == null) { - System.out.print("How many repos do you want to fetch? Enter one or [q] to quit: "); + System.out.print("How many repos do you want to fetch (final results might not match this number)?" + + " Enter one or [q] to quit: "); String s = reader.nextLine().trim(); System.out.print("\n"); testTermination(s.toLowerCase()); From 148286c7ec39842840695d1dc2bab561c8cbc8c0 Mon Sep 17 00:00:00 2001 From: Bryce Vonilten Date: Wed, 11 Dec 2019 15:27:12 -0800 Subject: [PATCH 06/11] Connections to github isolated to its own class --- src/GetRepoNames.java | 6 ++--- src/GitHubFetcher.java | 48 ++++++++++++++++++++++++++++++++++++++++ src/RepoTraversal.java | 50 +++++------------------------------------- 3 files changed, 56 insertions(+), 48 deletions(-) create mode 100644 src/GitHubFetcher.java diff --git a/src/GetRepoNames.java b/src/GetRepoNames.java index 82edc61..6ed92b3 100644 --- a/src/GetRepoNames.java +++ b/src/GetRepoNames.java @@ -14,16 +14,16 @@ public class GetRepoNames { public ArrayList getReposByStars (int resultLimit) { ArrayList results = new ArrayList(); - RepoTraversal util = new RepoTraversal(); //The makeGetRequest() should be moved into its own class to avoid implementation here + GitHubFetcher fetcher = new GitHubFetcher(); //The makeGetRequest() should be moved into its own class to avoid implementation here try { - JSONObject result = util.makeGetRequest("https://api.github.com/search/repositories?" + + JSONObject result = fetcher.makeGetRequest("https://api.github.com/search/repositories?" + "q=language:java&sort=stars&order=desc&per_page=" + resultLimit + "&page=1"); JSONArray jsonArray = result.getJSONArray("items"); for (int i = 0, size = jsonArray.length(); i < size; i++) { JSONObject objectInArray = jsonArray.getJSONObject(i); String r = "https://api.github.com/repos/" + objectInArray.getString("full_name"); - r += util.getDefaultBranch(r); + r += fetcher.getDefaultBranch(r); results.add(r); } } catch (CustomException e) { diff --git a/src/GitHubFetcher.java b/src/GitHubFetcher.java new file mode 100644 index 0000000..ad652a2 --- /dev/null +++ b/src/GitHubFetcher.java @@ -0,0 +1,48 @@ +import config.Config; +import org.json.JSONObject; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; + +public class GitHubFetcher { + public JSONObject makeGetRequest(String urlString) throws CustomException { + try { + String authToken = Config.getAuthToken(); + URL url = new URL(urlString); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty("Authorization", "Bearer " + authToken); + conn.setRequestProperty("User-Agent", "code-style-mining"); + conn.setRequestProperty("Content-Type","application/json"); + conn.setDoOutput(true); + conn.setDoInput(true); + conn.setRequestMethod("GET"); + + BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream())); + String inputLine; + StringBuffer response = new StringBuffer(); + + while((inputLine = in.readLine()) != null) { + response.append(inputLine + "\n"); + } + in.close(); + return new JSONObject(response.toString()); + } catch (Exception e) { + e.printStackTrace(); + } + throw new CustomException("Could not make get request."); + } + + public String getDefaultBranch(String url) { + StringBuilder sb = new StringBuilder(); + sb.append("/branches/"); + try { + JSONObject response = makeGetRequest(url); + sb.append(response.getString("default_branch")); + } catch(Exception e) { + e.printStackTrace(); + } + return sb.toString(); + } +} diff --git a/src/RepoTraversal.java b/src/RepoTraversal.java index ed82a77..d38384d 100644 --- a/src/RepoTraversal.java +++ b/src/RepoTraversal.java @@ -9,13 +9,12 @@ import util.UrlFilepathPair; import java.io.*; -import java.net.HttpURLConnection; -import java.net.URL; import java.util.ArrayList; public class RepoTraversal { private long numFiles; private long numJavaFiles; + private GitHubFetcher fetcher = new GitHubFetcher(); public RepoTraversal() { } @@ -120,7 +119,7 @@ private JSONObject traverseRepoForFileContent(String repoURL) { JSONifySummary repoSummary = new JSONifySummary(numFiles, numJavaFiles); //UrlFilepathPair contains the blob url and the file path of the blob for(UrlFilepathPair url : urls) { - JSONObject content = makeGetRequest(url.getRepoBlobUrl()); + JSONObject content = fetcher.makeGetRequest(url.getRepoBlobUrl()); String contentStr = content.getString("content"); contentStr = contentStr.replaceAll("\n", ""); decodeAndParseFile(contentStr, repoURL, url.getFilePath(), repoSummary); @@ -166,7 +165,7 @@ private ArrayList traverseTreeForFileURLs(String repoURL) { try { JSONObject treeObj = getTreeObjectFromRepo(repoURL); String treeURL = treeObj.getString("url"); - JSONObject tree = makeGetRequest(treeURL + "?recursive=1"); + JSONObject tree = fetcher.makeGetRequest(treeURL + "?recursive=1"); JSONArray array = getJSONArrayByKey(tree, "tree"); for(int i = 0; i < array.length(); i++) { @@ -189,7 +188,7 @@ private ArrayList traverseTreeForFileURLs(String repoURL) { private JSONObject getTreeObjectFromRepo(String url) throws CustomException { try { - JSONObject response = makeGetRequest(url); + JSONObject response = fetcher.makeGetRequest(url); String[] keys = new String[] { "commit", "commit", "tree" }; return recurseForJSONObject(response, keys); } catch(Exception e) { @@ -228,45 +227,6 @@ private JSONObject recurseForJSONObject(JSONObject source, String[] keys) { return object; } - public JSONObject makeGetRequest(String urlString) throws CustomException { - try { - String authToken = Config.getAuthToken(); - URL url = new URL(urlString); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty("Authorization", "Bearer " + authToken); - conn.setRequestProperty("User-Agent", "code-style-mining"); - conn.setRequestProperty("Content-Type","application/json"); - conn.setDoOutput(true); - conn.setDoInput(true); - conn.setRequestMethod("GET"); - - BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream())); - String inputLine; - StringBuffer response = new StringBuffer(); - - while((inputLine = in.readLine()) != null) { - response.append(inputLine + "\n"); - } - in.close(); - return new JSONObject(response.toString()); - } catch (Exception e) { - e.printStackTrace(); - } - throw new CustomException("Could not make get request."); - } - - public String getDefaultBranch(String url) { - StringBuilder sb = new StringBuilder(); - sb.append("/branches/"); - try { - JSONObject response = makeGetRequest(url); - sb.append(response.getString("default_branch")); - } catch(Exception e) { - e.printStackTrace(); - } - return sb.toString(); - } - private ArrayList getRepoURLsFromConfig() { String repoURLsPath = Config.getRepoURLsPath(); ArrayList urls = new ArrayList<>(); @@ -281,7 +241,7 @@ private ArrayList getRepoURLsFromConfig() { StringBuilder sb = new StringBuilder(); sb.append("https://api.github.com/repos/"); sb.append(line); - sb.append(getDefaultBranch(sb.toString())); + sb.append(fetcher.getDefaultBranch(sb.toString())); urls.add(sb.toString()); } br.close(); From 3d79a36a65a3f8f1a054a4c035badc9d415959d9 Mon Sep 17 00:00:00 2001 From: Bryce Vonilten Date: Wed, 11 Dec 2019 17:08:52 -0800 Subject: [PATCH 07/11] Fetch repos by keywords support --- src/GetRepoNames.java | 41 +++++++++++++++++++++++++++++++++++------ src/MainParser.java | 33 +++++++++++++++++++++++++++++++-- src/RepoTraversal.java | 9 +++++++-- 3 files changed, 73 insertions(+), 10 deletions(-) diff --git a/src/GetRepoNames.java b/src/GetRepoNames.java index 6ed92b3..6b3c9cf 100644 --- a/src/GetRepoNames.java +++ b/src/GetRepoNames.java @@ -5,16 +5,45 @@ import java.util.ArrayList; public class GetRepoNames { + public ArrayList getReposByKeywords (int resultLimit, ArrayList keywords) { + ArrayList results = new ArrayList(); + GitHubFetcher fetcher = new GitHubFetcher(); + + try { + double originalLimit = new Double (resultLimit); + + for (String word : keywords) { + //Mixes up the categories a bit among the keywords + int numRepos = (int) Math.ceil(originalLimit / keywords.size()); + numRepos = (numRepos > resultLimit)? resultLimit : numRepos; + resultLimit -= numRepos; + + if (numRepos == 0) { + break; + } + + JSONObject result = fetcher.makeGetRequest("https://api.github.com/search/repositories?q=" + + word + "+language:java&sort=stars&order=desc&per_page=" + numRepos + "&page=1"); + JSONArray jsonArray = result.getJSONArray("items"); + for (int i = 0, size = jsonArray.length(); i < size; i++) { + JSONObject objectInArray = jsonArray.getJSONObject(i); + String r = "https://api.github.com/repos/" + objectInArray.getString("full_name"); + r += fetcher.getDefaultBranch(r); + results.add(r); + } + } + } catch (CustomException e) { + e.printStackTrace(); + } catch (JSONException e) { + e.printStackTrace(); + } -// public ArrayList getReposByKeywords (int resultLimit, String [] keywords) { -// ArrayList results = new ArrayList(); -// -// return results; -// } + return results; + } public ArrayList getReposByStars (int resultLimit) { ArrayList results = new ArrayList(); - GitHubFetcher fetcher = new GitHubFetcher(); //The makeGetRequest() should be moved into its own class to avoid implementation here + GitHubFetcher fetcher = new GitHubFetcher(); try { JSONObject result = fetcher.makeGetRequest("https://api.github.com/search/repositories?" + diff --git a/src/MainParser.java b/src/MainParser.java index f1ca687..47b3458 100644 --- a/src/MainParser.java +++ b/src/MainParser.java @@ -7,6 +7,7 @@ import org.apache.commons.configuration2.ex.ConfigurationException; import org.json.JSONException; +import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import java.util.Scanner; @@ -90,6 +91,8 @@ public static void main(String[] args) throws JSONException { String inputType = ""; Integer limitRepos = null; + ArrayList keywords = new ArrayList<>(); + String optionType = ""; while (!inputType.equals("f") && !inputType.equals("g")) { System.out.print("Do you want to get repo names from file [f] or generate them [g]. Enter one or [q] to quit: "); inputType = reader.nextLine().trim(); @@ -106,6 +109,28 @@ public static void main(String[] args) throws JSONException { configMap.put("repoURLsPath", s); } } else { + while (!optionType.equals("s") && !optionType.equals("k")) { + System.out.print("Get repo names by top stars [s] or by keywords [k]. Enter one of these or [q] to quit: "); + optionType = reader.nextLine().trim(); + System.out.print("\n"); + testTermination(optionType.toLowerCase()); + } + + if (optionType.equals("k")) { + String t = ""; + while (!t.equals("c")) { + System.out.print("Enter keyword or [c] to continue: "); + t = reader.nextLine().trim(); + System.out.print("\n"); + if (t.equals("c")) { + break; + } else if (!t.isEmpty()){ + keywords.add(t); + } + } + System.out.println("Using keywords: " + keywords); + } + while (limitRepos == null) { System.out.print("How many repos do you want to fetch (final results might not match this number)?" + " Enter one or [q] to quit: "); @@ -113,7 +138,9 @@ public static void main(String[] args) throws JSONException { System.out.print("\n"); testTermination(s.toLowerCase()); try { - limitRepos = Integer.parseInt(s); + if (Integer.parseInt(s) >= 0) { + limitRepos = Integer.parseInt(s); + } } catch (NumberFormatException e) { continue; } @@ -124,7 +151,9 @@ public static void main(String[] args) throws JSONException { configMap.get("mongoUrl"), configMap.get("mongoDatabase"), configMap.get("mongoCollection"), configMap.get("tempJavaFilePath"), configMap.get("tempJSONFilePath"), configMap.get("repoURLsPath")); - traverser.findJavaFilesToParse(inputType, limitRepos); + //TODO might want to handle these options more dynamically. + // In some cases you dont need all of these put they are passed anyway + traverser.findJavaFilesToParse(inputType, optionType, limitRepos, keywords); } /*public static void getGitFiles(String url, String directory) { diff --git a/src/RepoTraversal.java b/src/RepoTraversal.java index d38384d..b88e865 100644 --- a/src/RepoTraversal.java +++ b/src/RepoTraversal.java @@ -19,14 +19,19 @@ public class RepoTraversal { public RepoTraversal() { } - public void findJavaFilesToParse(String inputType, Integer limitRepos) { + public void findJavaFilesToParse(String inputType, String optionType, Integer limitRepos, ArrayList keywords) { ArrayList repoURLs; if (inputType.equals("f")) { repoURLs = getRepoURLsFromConfig(); } else { GetRepoNames getRepoNames = new GetRepoNames(); - repoURLs = getRepoNames.getReposByStars(limitRepos); + + if (optionType.equals("s")) { + repoURLs = getRepoNames.getReposByStars(limitRepos); + } else { + repoURLs = getRepoNames.getReposByKeywords(limitRepos, keywords); + } } int count = 0; From e97102d42af3c7a8cfe165ef2e8b9906a2703850 Mon Sep 17 00:00:00 2001 From: Bryce Vonilten Date: Wed, 11 Dec 2019 17:18:10 -0800 Subject: [PATCH 08/11] Mentioned in the readme that you can input credentials via the CL --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3d7da5e..a48ecba 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This project aims at mining public software repositories on GitHub in order to m Properties file: The properties file provides a way to supply information for running the Java code without pushing private or unnecessary data to the repository. One needs to create a file called javaAnalysis.properties file in the same directory as the project (above src). -The config.Config class communicates with this .properties file to supply information to the Java project. +If information is left out but is necessary for running, it will ask for these on the command line. The structure of the .properties file is as follows (don't include the brackets): From 8de0688cea3b290362ec3f1fb8fdceb94f6f2285 Mon Sep 17 00:00:00 2001 From: Tammy Date: Sun, 24 Oct 2021 20:57:14 -0700 Subject: [PATCH 09/11] rate limit functionality --- src/GitHubFetcher.java | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) mode change 100644 => 100755 src/GitHubFetcher.java diff --git a/src/GitHubFetcher.java b/src/GitHubFetcher.java old mode 100644 new mode 100755 index ad652a2..9b4f4e4 --- a/src/GitHubFetcher.java +++ b/src/GitHubFetcher.java @@ -5,6 +5,8 @@ import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; +import java.time.Instant; +import java.util.concurrent.TimeUnit; public class GitHubFetcher { public JSONObject makeGetRequest(String urlString) throws CustomException { @@ -18,6 +20,11 @@ public JSONObject makeGetRequest(String urlString) throws CustomException { conn.setDoOutput(true); conn.setDoInput(true); conn.setRequestMethod("GET"); + + if (conn.getResponseCode() == 403) { + reachedAPIRateLimit(); + makeGetRequest(urlString); + } BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream())); String inputLine; @@ -34,6 +41,33 @@ public JSONObject makeGetRequest(String urlString) throws CustomException { throw new CustomException("Could not make get request."); } + public void reachedAPIRateLimit() throws CustomException { + String urlString = "https://api.github.com/rate_limit"; + try { + JSONObject respJSON = makeGetRequest(urlString); + int coreRemaining = respJSON.getJSONObject("resources").getJSONObject("core").getInt("remaining"); + int searchRemaining = respJSON.getJSONObject("resources").getJSONObject("search").getInt("remaining"); + if (coreRemaining == 0) { + int resetTime = respJSON.getJSONObject("resources").getJSONObject("core").getInt("reset"); + int currentTime = (int) Instant.now().getEpochSecond(); + int sleepTime = resetTime - currentTime; + if (sleepTime > 0) { + sleepTime += (60 * 5); + System.out.println( + "reached API rate limit of 5000 per hour... sleeping for " + sleepTime + + " seconds (" + (sleepTime/60) + " minutes)" + ); + TimeUnit.SECONDS.sleep(sleepTime); + } + } else if (searchRemaining == 0) { + System.out.println("reached minute API rate limit of 30 per minute... sleeping for 60 seconds"); + TimeUnit.SECONDS.sleep(60); + } + } catch (Exception e) { + e.printStackTrace(); + } + } + public String getDefaultBranch(String url) { StringBuilder sb = new StringBuilder(); sb.append("/branches/"); From 78dc365fc99ce73ace7832fefcd3bfa800c44283 Mon Sep 17 00:00:00 2001 From: Bruno da Silva Date: Thu, 29 Sep 2022 15:15:10 -0700 Subject: [PATCH 10/11] Properties file --- pythonAnalysis.properties | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 pythonAnalysis.properties diff --git a/pythonAnalysis.properties b/pythonAnalysis.properties new file mode 100644 index 0000000..8406509 --- /dev/null +++ b/pythonAnalysis.properties @@ -0,0 +1,7 @@ +authToken = +repoURLsPath= +mongoUsername = +mongoPassword = +mongoUrl = +mongoDatabase = +mongoCollection = \ No newline at end of file From 6bf7114263301ac3a9a283845c5c0d32f49b1c1e Mon Sep 17 00:00:00 2001 From: Bruno C da Silva Date: Thu, 29 Sep 2022 15:20:50 -0700 Subject: [PATCH 11/11] Updated gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3ae3add..027b86d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ target/ javaAnalysis.properties .idea/ *.iml +*.properties \ No newline at end of file