-
-
Notifications
You must be signed in to change notification settings - Fork 1
Initial commit - Spring Batch Importer Pipeline #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| .gitignore | ||
| /importer/nbproject/private/ | ||
| /importer/dist/ | ||
| /importer/build/ | ||
| /importer/target/ | ||
| /importer/src/main/resources/application.properties | ||
| /importer/nbproject/ | ||
| /importer/nb-configuration.xml |
Large diffs are not rendered by default.
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| <?xml version="1.0" encoding="UTF-8"?> | ||
| <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
| <modelVersion>4.0.0</modelVersion> | ||
| <name>cBioPortal Importer Pipeline</name> | ||
| <description>Spring Batch importer pipeline</description> | ||
| <artifactId>importer</artifactId> | ||
| <version>0.1.0</version> | ||
|
|
||
| <parent> | ||
| <groupId>org.cbio.portal.pipelines</groupId> | ||
| <artifactId>master</artifactId> | ||
| <version>0.1.0</version> | ||
| </parent> | ||
|
|
||
|
|
||
| <dependencies> | ||
| <dependency> | ||
| <groupId>org.mskcc.cbio</groupId> | ||
| <artifactId>model</artifactId> | ||
| <version>0.1.0</version> | ||
| <type>jar</type> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>org.mskcc.cbio</groupId> | ||
| <artifactId>persistence-jdbc</artifactId> | ||
| <version>0.1.0</version> | ||
| <type>jar</type> | ||
| </dependency> | ||
|
|
||
| <dependency> | ||
| <groupId>org.springframework</groupId> | ||
| <artifactId>spring-web</artifactId> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>commons-lang</groupId> | ||
| <artifactId>commons-lang</artifactId> | ||
| <version>2.4</version> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>commons-cli</groupId> | ||
| <artifactId>commons-cli</artifactId> | ||
| <version>1.3</version> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>commons-collections</groupId> | ||
| <artifactId>commons-collections</artifactId> | ||
| <type>jar</type> | ||
| </dependency> | ||
|
|
||
| <!-- for Spring JDBC, BasicDataSource usage --> | ||
| <dependency> | ||
| <groupId>org.springframework</groupId> | ||
| <artifactId>spring-jdbc</artifactId> | ||
| <type>jar</type> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>commons-dbcp</groupId> | ||
| <artifactId>commons-dbcp</artifactId> | ||
| <type>jar</type> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>mysql</groupId> | ||
| <artifactId>mysql-connector-java</artifactId> | ||
| </dependency> | ||
|
|
||
| <dependency> | ||
| <groupId>com.google.guava</groupId> | ||
| <artifactId>guava</artifactId> | ||
| <version>19.0</version> | ||
| <type>jar</type> | ||
| </dependency> | ||
| </dependencies> | ||
|
|
||
|
|
||
| <build> | ||
| <!-- this plugin will allow us to share resources with children --> | ||
| <!-- <plugins> | ||
| <plugin> | ||
| <groupId>org.apache.maven.plugins</groupId> | ||
| <artifactId>maven-resources-plugin</artifactId> | ||
| <version>2.6</version> | ||
| </plugin> | ||
| <plugin> | ||
| <groupId>org.apache.maven.plugins</groupId> | ||
| <artifactId>maven-site-plugin</artifactId> | ||
| <version>3.4</version> | ||
| </plugin> | ||
| <plugin> | ||
| <groupId>org.apache.maven.plugins</groupId> | ||
| <artifactId>maven-release-plugin</artifactId> | ||
| <version>2.5.3</version> | ||
| </plugin> | ||
| </plugins>--> | ||
|
|
||
| <pluginManagement> | ||
| <plugins> | ||
| <plugin> | ||
| <groupId>org.apache.maven.plugins</groupId> | ||
| <artifactId>maven-compiler-plugin</artifactId> | ||
| <version>3.5.1</version> | ||
| <configuration> | ||
| <source>1.8</source> | ||
| <target>1.8</target> | ||
| </configuration> | ||
| </plugin> | ||
| </plugins> | ||
| </pluginManagement> | ||
| </build> | ||
| <repositories> | ||
| <repository> | ||
| <id>model-mvn-repo</id> | ||
| <url>https://raw.github.com/angelicaochoa/persistence/model-0.1.0-mvn-repo/</url> | ||
| <snapshots> | ||
| <enabled>true</enabled> | ||
| <updatePolicy>always</updatePolicy> | ||
| </snapshots> | ||
| </repository> | ||
| <repository> | ||
| <id>persistence-mybatis-mvn-repo</id> | ||
| <url>https://raw.github.com/angelicaochoa/persistence/persistence-mybatis-0.1.0-mvn-repo/</url> | ||
| <snapshots> | ||
| <enabled>true</enabled> | ||
| <updatePolicy>always</updatePolicy> | ||
| </snapshots> | ||
| </repository> | ||
| <repository> | ||
| <id>persistence-jdbc-mvn-repo</id> | ||
| <url>https://raw.github.com/angelicaochoa/persistence/persistence-jdbc-0.1.0-mvn-repo/</url> | ||
| <snapshots> | ||
| <enabled>true</enabled> | ||
| <updatePolicy>always</updatePolicy> | ||
| </snapshots> | ||
| </repository> | ||
| </repositories> | ||
| </project> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,130 @@ | ||
| /* | ||
| * Copyright (c) 2016 Memorial Sloan-Kettering Cancer Center. | ||
| * | ||
| * This library is distributed in the hope that it will be useful, but WITHOUT | ||
| * ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS | ||
| * FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder | ||
| * is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no | ||
| * obligations to provide maintenance, support, updates, enhancements or | ||
| * modifications. In no event shall Memorial Sloan-Kettering Cancer Center be | ||
| * liable to any party for direct, indirect, special, incidental or | ||
| * consequential damages, including lost profits, arising out of the use of this | ||
| * software and its documentation, even if Memorial Sloan-Kettering Cancer | ||
| * Center has been advised of the possibility of such damage. | ||
| */ | ||
|
|
||
| /* | ||
| * This file is part of cBioPortal. | ||
| * | ||
| * cBioPortal is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU Affero General Public License as | ||
| * published by the Free Software Foundation, either version 3 of the | ||
| * License. | ||
| * | ||
| * This program is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU Affero General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU Affero General Public License | ||
| * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| package org.cbio.portal.pipelines.importer; | ||
|
|
||
| import org.cbio.portal.pipelines.importer.config.BatchConfiguration; | ||
|
|
||
| import java.io.*; | ||
| import java.util.*; | ||
| import org.apache.commons.cli.*; | ||
| import org.apache.commons.logging.*; | ||
|
|
||
| import org.springframework.boot.SpringApplication; | ||
| import org.springframework.boot.autoconfigure.SpringBootApplication; | ||
| import org.springframework.context.ConfigurableApplicationContext; | ||
| import org.springframework.batch.core.*; | ||
| import org.springframework.batch.core.launch.JobLauncher; | ||
|
|
||
| /** | ||
| * | ||
| * @author ochoaa | ||
| */ | ||
| @SpringBootApplication | ||
| public class ImporterPipeline { | ||
|
|
||
| private static final Log LOG = LogFactory.getLog(ImporterPipeline.class); | ||
|
|
||
| private static Options getOptions(String[] args) { | ||
| Options gnuOptions = new Options(); | ||
| gnuOptions.addOption("h", "help", false, "shows this help document and quits.") | ||
| .addOption("i", "import_study", true, "Cancer study directory to import") | ||
| .addOption("d", "delete_study", true, "Cancer study identifier for deleting study"); | ||
| return gnuOptions; | ||
| } | ||
|
|
||
| private static void help(Options gnuOptions, int exitStatus) { | ||
| HelpFormatter helpFormatter = new HelpFormatter(); | ||
| helpFormatter.printHelp("ImporterPipeline", gnuOptions); | ||
| System.exit(exitStatus); | ||
| } | ||
|
|
||
| private static void launchImporterJob(String[] args, String stagingDirectory) throws Exception { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lets remove all references to stagingDirectory - replace with studyDirectory. |
||
|
|
||
| SpringApplication app = new SpringApplication(ImporterPipeline.class); | ||
| ConfigurableApplicationContext ctx = app.run(args); | ||
| JobLauncher jobLauncher = ctx.getBean(JobLauncher.class); | ||
|
|
||
| Job batchImporterJob = ctx.getBean(BatchConfiguration.BATCH_STUDY_IMPORTER_JOB, Job.class); | ||
|
|
||
| JobParameters jobParameters = new JobParametersBuilder() | ||
| .addString("stagingDirectory", stagingDirectory) | ||
| .addDate("date", new Date()) | ||
| .toJobParameters(); | ||
|
|
||
| JobExecution jobExecution = jobLauncher.run(batchImporterJob, jobParameters); | ||
| if (jobExecution.getExitStatus().getExitCode().equals("STOPPED")) { | ||
| LOG.error("Error importing cancer study."); | ||
| } | ||
| ctx.close(); | ||
| } | ||
|
|
||
| private static void launchDeleteStudyJob(String[] args, String cancerStudyIdentifier) throws Exception { | ||
| SpringApplication app = new SpringApplication(ImporterPipeline.class); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor formatting issue - none of the other methods in this class have a space following the method signature. |
||
| ConfigurableApplicationContext ctx = app.run(args); | ||
| JobLauncher jobLauncher = ctx.getBean(JobLauncher.class); | ||
|
|
||
| Job deleteStudyJob = ctx.getBean(BatchConfiguration.DELETE_CANCER_STUDY_JOB, Job.class); | ||
|
|
||
| JobParameters jobParameters = new JobParametersBuilder() | ||
| .addString("cancerStudyIdentifier", cancerStudyIdentifier) | ||
| .toJobParameters(); | ||
|
|
||
| JobExecution jobExecution = jobLauncher.run(deleteStudyJob, jobParameters); | ||
| ctx.close(); | ||
| } | ||
|
|
||
| public static void main(String[] args) throws Exception { | ||
| Options gnuOptions = ImporterPipeline.getOptions(args); | ||
| CommandLineParser parser = new GnuParser(); | ||
| CommandLine commandLine = parser.parse(gnuOptions, args); | ||
| if (commandLine.hasOption("h") || | ||
| (!commandLine.hasOption("i") && !commandLine.hasOption("d"))) { | ||
| help(gnuOptions, 0); | ||
| } | ||
|
|
||
| if (commandLine.hasOption("d")) { | ||
| String cancerStudyIdentifier = commandLine.getOptionValue("d"); | ||
| launchDeleteStudyJob(args, cancerStudyIdentifier); | ||
| } | ||
|
|
||
| if (commandLine.hasOption("i")) { | ||
| String stagingDirectory = commandLine.getOptionValue("i"); | ||
| if (!(new File(stagingDirectory).exists())) { | ||
| LOG.error("Staging directory does not exist - please check argument: " + stagingDirectory); | ||
| System.exit(2); | ||
| } | ||
| launchImporterJob(args, stagingDirectory); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should support a directory of subdirectories. |
||
| } | ||
| } | ||
|
|
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would change all packages to be consistent with the primary portal project. I would also shorten it: org.cbioportal.importer.