diff --git a/JAR/jcommander.jar b/JAR/jcommander.jar new file mode 100644 index 0000000..a05f017 Binary files /dev/null and b/JAR/jcommander.jar differ diff --git a/JAR/yamlbeans-1.06.jar b/JAR/yamlbeans-1.06.jar new file mode 100644 index 0000000..7a98cdf Binary files /dev/null and b/JAR/yamlbeans-1.06.jar differ diff --git a/src/commands/CrawlCommand.java b/src/commands/CrawlCommand.java new file mode 100644 index 0000000..348e4ce --- /dev/null +++ b/src/commands/CrawlCommand.java @@ -0,0 +1,68 @@ +package commands; + +import com.beust.jcommander.Parameter; +import com.beust.jcommander.Parameters; +import configuration.ConfigurationService; +import configuration.CrawlerConfig; +import main.GitHubCrawler; + +/** + * The Crawl Command executes the GitHubCrawler with the settings passed through the commandline. + * + * @author Ali Bozna + */ +@Parameters(separators = "=") +public class CrawlCommand { + @Parameter(names = "--language", description = "") + private String language; + + @Parameter(names = { "--stars", "--stars-decrease-amount" }, description = "", required = true) + private int starDecreaseAmount; + + @Parameter(names = { "--build-system", "--bs" }, description = "", required = false) + private String buildSystem; + + /** + * Gets the language. + * + * @return Returns the language. + */ + public String getLanguage() { + return this.language; + } + + /** + * Gets the stars decrease amount. + * + * @return Returns the stars decrease amount. + */ + public int getStarsDecreaseAmount() { + return this.starDecreaseAmount; + } + + /** + * Gets the Build-System. + * + * @return Returns the Build-System. + */ + public String getBuildSystem() { + return this.buildSystem; + } + + /** + * Executes the GitHubCrawler with the settings passed through the commandline. Also it loads the + * configured Build-Systems from the configuration file. + */ + public void execute() { + CrawlerConfig config = new CrawlerConfig(); + config.starDecreaseAmount = this.getStarsDecreaseAmount(); + config.buildSystem = this.getBuildSystem(); + config.language = this.getLanguage(); + + // Get the Build-Systems from the configuration file. + config.buildSystems = new ConfigurationService().getConfig().buildSystems; + + GitHubCrawler crawler = new GitHubCrawler(config); + crawler.run(); + } +} diff --git a/src/configuration/BuildSystem.java b/src/configuration/BuildSystem.java new file mode 100644 index 0000000..f48e7c2 --- /dev/null +++ b/src/configuration/BuildSystem.java @@ -0,0 +1,34 @@ +package configuration; + +import java.util.List; + +/** + * Defining Build System with public name and buildFiles fields so the objects can be + * serialized. The end-user has the option to define custom build systems. + * + * @author Ali Bozna + */ +public class BuildSystem { + public static BuildSystem UNKOWN = new BuildSystem("UNKOWN", new String[]{}); + + public String name; + public String[] buildFiles; + + private List filePaths; + + public BuildSystem(String name, String[] buildFiles) { + this.buildFiles = buildFiles; + this.name = name; + } + + public BuildSystem() { + } + + public List getFilePaths() { + return this.filePaths; + } + + public void setFilePaths(List filePaths) { + this.filePaths = filePaths; + } +} diff --git a/src/configuration/ConfigurationService.java b/src/configuration/ConfigurationService.java new file mode 100644 index 0000000..e361d05 --- /dev/null +++ b/src/configuration/ConfigurationService.java @@ -0,0 +1,89 @@ +package configuration; + +import com.esotericsoftware.yamlbeans.YamlReader; +import com.esotericsoftware.yamlbeans.YamlWriter; + +import java.io.FileReader; +import java.io.FileWriter; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; + +/** + * @author Ali Bozna + */ +public class ConfigurationService { + + public ConfigurationService() { + } + + /** + * Deserializes the @see CrawlerConfig from the user path. + * + * @return Returns the @see CrawlerConfig. + */ + public CrawlerConfig getConfig() { + Path configPath = Paths.get(System.getProperty("user.home"), "crawler", "config.yaml"); + + System.out.println(configPath.toFile().getAbsolutePath()); + + // TODO If the configuration file is empty it will still deserialize it instead of failing. + try(FileReader fileReader = new FileReader(configPath.toFile())) { + YamlReader reader = new YamlReader(fileReader); + CrawlerConfig config = (CrawlerConfig)reader.read(); + reader.close(); + + return config; + + } catch(Exception exception) { + System.err.println(exception.getMessage()); + } + + // If the configuration deserialization failed create overwrite the configuration with + // some default configuration. + CrawlerConfig config = new CrawlerConfig(); + config.oauthtoken = ""; + config.language = "CPP"; + config.lastPushDate = DateTimeFormatter.ofPattern("YYYY-MM-dd").format(LocalDateTime.now()); + config.starDecreaseAmount = 1; + config.buildSystem = "CUSTOM"; + config.customFile = "CHANGELOG.md"; + config.filePath = "shared"; + config.buildSystems = this.getDefaultBuildSystems(); + + try { + configPath.toFile().getParentFile().mkdirs(); + } catch (Exception exception) { + System.err.println(exception.getMessage()); + } + + // Save the created default configuration. + try(FileWriter fileWriter = new FileWriter(configPath.toFile())) { + YamlWriter writer = new YamlWriter(fileWriter); + writer.write(config); + writer.close(); + + } catch(Exception exception) { + System.err.println(exception.getMessage()); + } + + return config; + } + + /** + * @return Returns default build-systems. + */ + private List getDefaultBuildSystems() { + List systems = new ArrayList(); + systems.add(new BuildSystem("CMAKE", new String[] { "CMakeLists.txt" })); + systems.add(new BuildSystem("AUTOTOOLS", new String[] { "configure.ac", "configure.in", "Makefile.am" })); + systems.add(new BuildSystem("MAKE", new String[] { "Makefile" })); + systems.add(new BuildSystem("CUSTOM", new String[] { "" })); + systems.add(BuildSystem.UNKOWN); + + return systems; + } +} diff --git a/src/configuration/CrawlerConfig.java b/src/configuration/CrawlerConfig.java new file mode 100644 index 0000000..c7ce503 --- /dev/null +++ b/src/configuration/CrawlerConfig.java @@ -0,0 +1,29 @@ +package configuration; + + +import java.util.List; + +/** + * Configuration file that will be de/serialized from/to Yaml file. + * + * @author Ali Bozna + */ +public class CrawlerConfig { + public String language; + + public String lastPushDate; + + public int starDecreaseAmount; + + public String oauthtoken; + + public String filePath; + + public String jsonFileName; + + public String customFile; + + public String buildSystem; + + public List buildSystems; +} diff --git a/src/main/Config.java b/src/main/Config.java index eca19b2..0420f14 100644 --- a/src/main/Config.java +++ b/src/main/Config.java @@ -6,14 +6,14 @@ public class Config { - public static final String LANGUAGE = PropertyFileReader.getInstance().getProperty(EConfig.LANGUAGE); - public static final String LASTPUSHEDDATE = PropertyFileReader.getInstance().getProperty(EConfig.LASTPUSHEDDATE); - public static final String STARSDECREASEAMOUNT = PropertyFileReader.getInstance().getProperty(EConfig.STARSDECREASEAMOUNT); - public static final BuildSystem BUILDSYSTEM = BuildSystem.getBuildType(PropertyFileReader.getInstance().getProperty(EConfig.BUILDSYSTEM)); - public static final String OAUTHTOKEN = PropertyFileReader.getInstance().getProperty(EConfig.OAUTHTOKEN); - public static final String FILEPATH = PropertyFileReader.getInstance().getProperty(EConfig.FILEPATH); - public static final String JSONFILENAME = "repositories.json"; - public static final String CUSTOMFILE = PropertyFileReader.getInstance().getProperty(EConfig.CUSTOMFILE); + public static final String LANGUAGE = null;//PropertyFileReader.getInstance().getProperty(EConfig.LANGUAGE); + public static final String LASTPUSHEDDATE = null;//PropertyFileReader.getInstance().getProperty(EConfig.LASTPUSHEDDATE); + public static final String STARSDECREASEAMOUNT = null;//PropertyFileReader.getInstance().getProperty(EConfig.STARSDECREASEAMOUNT); + public static final BuildSystem BUILDSYSTEM = null;//BuildSystem.getBuildType(PropertyFileReader.getInstance().getProperty(EConfig.BUILDSYSTEM)); + public static final String OAUTHTOKEN = null;//PropertyFileReader.getInstance().getProperty(EConfig.OAUTHTOKEN); + public static final String FILEPATH = null;//PropertyFileReader.getInstance().getProperty(EConfig.FILEPATH); + public static final String JSONFILENAME = null;//"repositories.json"; + public static final String CUSTOMFILE = null;//PropertyFileReader.getInstance().getProperty(EConfig.CUSTOMFILE); } diff --git a/src/main/GitHubCrawler.java b/src/main/GitHubCrawler.java index 1fbad77..4b17292 100644 --- a/src/main/GitHubCrawler.java +++ b/src/main/GitHubCrawler.java @@ -1,8 +1,9 @@ package main; -import Models.BuildSystem; +import configuration.BuildSystem; import Models.RMetaData; import com.google.common.util.concurrent.RateLimiter; +import configuration.CrawlerConfig; import org.eclipse.egit.github.core.*; import org.eclipse.egit.github.core.client.GitHubClient; import org.eclipse.egit.github.core.client.PageIterator; @@ -25,18 +26,19 @@ public class GitHubCrawler { /** * The filtered programming language. */ - private String searchLanguage; + // private String searchLanguage; /** * The BuildSystem to detect and filter for. */ - private BuildSystem buildSystem; + // private BuildSystem buildSystem; /** * The GitHub client object. */ private GitHubClient client; - private String lastPushedDate; + // private String lastPushedDate; + // private int starDecreaseAmount; + private int maxStars = Integer.MAX_VALUE; - private int starDecreaseAmount; private int matchingRepos = 0; private int checkedRepos = 0; private int counterSearchRequests = 0; @@ -57,29 +59,37 @@ public class GitHubCrawler { private Calendar calendar; private SimpleDateFormat formatter; + private CrawlerConfig config; + private BuildSystem selectedBuildSystem; + /** * Crawlers Constructor. - * @param language The programming language filter. - * @param buildSystem The build system filter. - * @param oAuthToken The Github OAuth token for authentication. */ - public GitHubCrawler(String language, String lastPushedDate, String starsDecreaseAmount , BuildSystem buildSystem, String oAuthToken){ + // public GitHubCrawler(String language, String lastPushedDate, String starsDecreaseAmount , BuildSystem buildSystem, String oAuthToken){ + public GitHubCrawler(CrawlerConfig config) { + this.config = config; + this.selectedBuildSystem = (configuration.BuildSystem)this.config.buildSystems + .stream() + .filter(s -> ((configuration.BuildSystem)s).name.equals(this.config.buildSystem)) + .findFirst() + .orElse(new configuration.BuildSystem("UNKNOWN", new String[]{})); + this.calendar = Calendar.getInstance(); this.formatter = new SimpleDateFormat("dd-MM-yyyy HH:mm:ss"); this.systemStartTime = formatter.format(calendar.getTime()); this.startTime = System.nanoTime(); - this.searchLanguage = language; - this.lastPushedDate = lastPushedDate; - this.buildSystem = buildSystem; - this.client = authenticate(oAuthToken); + // this.searchLanguage = language; + // this.lastPushedDate = lastPushedDate; + // this.buildSystem = buildSystem; + + this.client = authenticate(config.oauthtoken); initGitHubServices(); printSetup(); calcRequestLimits(); try { - this.starDecreaseAmount = Integer.parseInt(starsDecreaseAmount); - if(starDecreaseAmount <= 0){ + if(config.starDecreaseAmount <= 0){ System.err.println("starsDecreaseAmount must be greater 0. Config file not properly set up.\nShutting down."); System.exit(1); } @@ -92,14 +102,16 @@ public GitHubCrawler(String language, String lastPushedDate, String starsDecreas private void printSetup() { System.out.println("----------CONFIGURATION----------"); - System.out.println("BuildSystem: " + Config.BUILDSYSTEM); - if(BuildSystem.CUSTOM == buildSystem) - System.out.println("Searching for custom file: " + Config.CUSTOMFILE); - System.out.println("Repository language: " + Config.LANGUAGE); - if(Config.FILEPATH.isEmpty()) + System.out.println("BuildSystem: " + config.buildSystem); + //if(BuildSystem.CUSTOM == buildSystem) + // System.out.println("Searching for custom file: " + Config.CUSTOMFILE); + + System.out.println("Repository language: " + config.language); + + if(config.filePath.isEmpty()) System.out.println("Output is written to: " + System.getProperty("user.dir")); else - System.out.println("Output is written to: " + System.getProperty("user.dir") + "/" + Config.FILEPATH); + System.out.println("Output is written to: " + System.getProperty("user.dir") + "/" + config.filePath); System.out.println("---------------------------------"); } @@ -112,7 +124,7 @@ private void initGitHubServices() { private void calcRequestLimits() { //Request limit values are defined here : https://developer.github.com/v3/#rate-limiting //Search Request limit values are defined here: https://developer.github.com/v3/search/#rate-limit - if(Config.OAUTHTOKEN.equals("")) { + if(config.oauthtoken.equals("")) { requestRateLimiter = RateLimiter.create(60d/3600d); searchRequestRateLimiter = RateLimiter.create(10d/60d); } else { // assuming correct token was provided! @@ -175,9 +187,9 @@ private GitHubClient authenticate(String oAuthToken) { */ private Map buildSearchQuery() { Map searchQuery = new HashMap(); - searchQuery.put("language", searchLanguage); //Search for repos with given searchlLanguage set in the config file + searchQuery.put("language", this.config.language); //Search for repos with given searchlLanguage set in the config file searchQuery.put("is", "public"); //Search for repos that are public - searchQuery.put("pushed", ">=" + lastPushedDate); // The pushed qualifier will return a list of repositories, sorted by the most recent commit made on any branch in the repository. + searchQuery.put("pushed", ">=" + this.config.lastPushDate); // The pushed qualifier will return a list of repositories, sorted by the most recent commit made on any branch in the repository. searchQuery.put("sort", "stars"); if(maxStars != Integer.MAX_VALUE && maxStars > 0 && foundRepoInLastQuery) { @@ -185,8 +197,8 @@ private Map buildSearchQuery() { System.out.println("Querying repositories with maximum number of stars of '" + maxStars + "' from last repository of previous query."); searchQuery.put("stars", "<=" + (maxStars)); } else if(!foundRepoInLastQuery && maxStars != Integer.MAX_VALUE) { - System.out.println("No repository was found within the last 1000 crawled repositories.\nDecreasing the current stars count of "+maxStars+" by "+starDecreaseAmount+"."); - maxStars = maxStars - starDecreaseAmount; + System.out.println("No repository was found within the last 1000 crawled repositories.\nDecreasing the current stars count of "+maxStars+" by "+ this.config.starDecreaseAmount +"."); + maxStars = maxStars - this.config.starDecreaseAmount; searchQuery.put("stars", "<=" + maxStars); } else if(!foundRepoInLastQuery && maxStars == Integer.MAX_VALUE && notFirstQuery) { //NOTE: This case is ignored. If we do not find any popular repository within the first 1000 repositories, @@ -297,14 +309,17 @@ private void filterRepositories(Map searchQuery) { System.out.println("Current maximum stars count: " + maxStars); checkedRepos++; //Detect BuildSystem subroutine + BuildSystem foundBuildSystem = getFileContentsAtRootDir(repositoryOfOwnerAndName); - if (foundBuildSystem == buildSystem) { //BuildSystem was detected. Create a new RMetaData object and store all information + + if (foundBuildSystem.name.equals(this.config.buildSystem)) { //BuildSystem was detected. Create a new RMetaData object and store all information matchingRepos++; foundRepoInLastQuery = true; System.err.println("Overall detected repos: " + matchingRepos); RMetaData metaDataObject = createRMetaDataObject(repositoryOfOwnerAndName, foundBuildSystem); JsonWriter.getInstance().writeRepositoryToJson(metaDataObject); } + System.out.println("Remaining Request: " + client.getRemainingRequests()); } } @@ -374,7 +389,7 @@ private String getLatestCommitId(Repository repository){ * @return The detected BuildSystem */ private BuildSystem getFileContentsAtRootDir(Repository repository) { - BuildSystem detectedBuildSystem = BuildSystem.UNKNOWN; + BuildSystem detectedBuildSystem = BuildSystem.UNKOWN; List filePaths = new ArrayList<>(); try { @@ -382,40 +397,44 @@ private BuildSystem getFileContentsAtRootDir(Repository repository) { List repositoryContents = contentsService.getContents(repository); // contentsService.getContents(repository, "path/to/folder"); //TODO: use this function to search for files on specific path! counterContentRequests++; - switch (buildSystem) { - case CMAKE: - if(repositoryContents.stream().anyMatch(o -> o.getName().equals(BuildSystem.CMAKE.getBuildFiles()[0]))) { + + // TODO This must be changed for end-users that define custom Build-Systems + switch (this.selectedBuildSystem.name) { + case "CMAKE": + if(repositoryContents.stream().anyMatch(o -> o.getName().equals(selectedBuildSystem.buildFiles[0]))) { // filePaths.add(""); // set here the filepath to the given file. //check github library docu for requesting all files within a given path! - detectedBuildSystem = BuildSystem.CMAKE; + detectedBuildSystem = selectedBuildSystem; } break; - case AUTOTOOLS://((configure.ac || configure.in) && Makefile.am)) - if(((repositoryContents.stream().anyMatch(o -> o.getName().equals(BuildSystem.AUTOTOOLS.getBuildFiles()[0])) || - repositoryContents.stream().anyMatch(o -> o.getName().equals(BuildSystem.AUTOTOOLS.getBuildFiles()[1]))) && - repositoryContents.stream().anyMatch(o -> o.getName().equals(BuildSystem.AUTOTOOLS.getBuildFiles()[2])))) { - detectedBuildSystem = BuildSystem.AUTOTOOLS; + case "AUTOTOOLS"://((configure.ac || configure.in) && Makefile.am)) + if(((repositoryContents.stream().anyMatch(o -> o.getName().equals(selectedBuildSystem.buildFiles[0])) || + repositoryContents.stream().anyMatch(o -> o.getName().equals(selectedBuildSystem.buildFiles[1]))) && + repositoryContents.stream().anyMatch(o -> o.getName().equals(selectedBuildSystem.buildFiles[2])))) { + detectedBuildSystem = selectedBuildSystem; } break; - case MAKE: - if(repositoryContents.stream().anyMatch(o -> o.getName().equals(BuildSystem.MAKE.getBuildFiles()[0]))) { - detectedBuildSystem = BuildSystem.MAKE; + case "MAKE": + if(repositoryContents.stream().anyMatch(o -> o.getName().equals(selectedBuildSystem.buildFiles[0]))) { + detectedBuildSystem = selectedBuildSystem; } break; - case CUSTOM: + case "CUSTOM": if(repositoryContents.stream().anyMatch(o -> o.getName().equals(Config.CUSTOMFILE))) { - detectedBuildSystem = BuildSystem.CUSTOM; + detectedBuildSystem = selectedBuildSystem; } break; default: System.out.println("Running default"); - detectedBuildSystem = BuildSystem.UNKNOWN; + detectedBuildSystem = BuildSystem.UNKOWN; break; } + } catch (IOException e) { System.err.println("Something went wrong while querying the repository contents.\n"); System.err.println(e.getMessage()); } + detectedBuildSystem.setFilePaths(filePaths); return detectedBuildSystem; } diff --git a/src/main/MainCrawler.java b/src/main/MainCrawler.java index d49976b..2ee0ef1 100644 --- a/src/main/MainCrawler.java +++ b/src/main/MainCrawler.java @@ -1,5 +1,9 @@ package main; +import com.beust.jcommander.JCommander; +import commands.CrawlCommand; +import configuration.ConfigurationService; + /** * Main entry point of the whole program. * @@ -8,10 +12,30 @@ public class MainCrawler { public static void main(String[] args) { - //Init crawler with configuration. - GitHubCrawler crawler = new GitHubCrawler(Config.LANGUAGE, Config.LASTPUSHEDDATE, Config.STARSDECREASEAMOUNT, Config.BUILDSYSTEM, Config.OAUTHTOKEN); - //Start the crawler. - crawler.run(); + if (args.length > 0) { + // Arguments over configuration file + try { + CrawlCommand crawlCmd = new CrawlCommand(); + + JCommander commander = new JCommander(); + commander.addCommand("crawl", crawlCmd); + + commander.parse(args); + String parsed = commander.getParsedCommand(); + + crawlCmd.execute(); + } catch(Exception exception) { + System.err.println("Failed to start Crawler through Commandline..."); + System.err.println(exception.getMessage()); + // Run the GitHubCrawler with the configuration file + GitHubCrawler crawler = new GitHubCrawler(new ConfigurationService().getConfig()); + crawler.run(); + } + } + else { + GitHubCrawler crawler = new GitHubCrawler(new ConfigurationService().getConfig()); + crawler.run(); + } } }