Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.gitignore
/importer/nbproject/private/
/importer/dist/
/importer/build/
/importer/target/
/importer/src/main/resources/application.properties
/importer/nbproject/
/importer/nb-configuration.xml
1,045 changes: 1,045 additions & 0 deletions docs/File-Formats.md

Large diffs are not rendered by default.

271 changes: 271 additions & 0 deletions docs/Importer-Workflow.md

Large diffs are not rendered by default.

Binary file added docs/ImporterWorkflowDiagram.pdf
Binary file not shown.
135 changes: 135 additions & 0 deletions importer/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<name>cBioPortal Importer Pipeline</name>
<description>Spring Batch importer pipeline</description>
<artifactId>importer</artifactId>
<version>0.1.0</version>

<parent>
<groupId>org.cbio.portal.pipelines</groupId>
<artifactId>master</artifactId>
<version>0.1.0</version>
</parent>


<dependencies>
<dependency>
<groupId>org.mskcc.cbio</groupId>
<artifactId>model</artifactId>
<version>0.1.0</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>org.mskcc.cbio</groupId>
<artifactId>persistence-jdbc</artifactId>
<version>0.1.0</version>
<type>jar</type>
</dependency>

<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-web</artifactId>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<type>jar</type>
</dependency>

<!-- for Spring JDBC, BasicDataSource usage -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-jdbc</artifactId>
<type>jar</type>
</dependency>
<dependency>
<groupId>commons-dbcp</groupId>
<artifactId>commons-dbcp</artifactId>
<type>jar</type>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
</dependency>

<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>19.0</version>
<type>jar</type>
</dependency>
</dependencies>


<build>
<!-- this plugin will allow us to share resources with children -->
<!-- <plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<version>2.6</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.4</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId>
<version>2.5.3</version>
</plugin>
</plugins>-->

<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.5.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
<repositories>
<repository>
<id>model-mvn-repo</id>
<url>https://raw.github.com/angelicaochoa/persistence/model-0.1.0-mvn-repo/</url>
<snapshots>
<enabled>true</enabled>
<updatePolicy>always</updatePolicy>
</snapshots>
</repository>
<repository>
<id>persistence-mybatis-mvn-repo</id>
<url>https://raw.github.com/angelicaochoa/persistence/persistence-mybatis-0.1.0-mvn-repo/</url>
<snapshots>
<enabled>true</enabled>
<updatePolicy>always</updatePolicy>
</snapshots>
</repository>
<repository>
<id>persistence-jdbc-mvn-repo</id>
<url>https://raw.github.com/angelicaochoa/persistence/persistence-jdbc-0.1.0-mvn-repo/</url>
<snapshots>
<enabled>true</enabled>
<updatePolicy>always</updatePolicy>
</snapshots>
</repository>
</repositories>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Copyright (c) 2016 Memorial Sloan-Kettering Cancer Center.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS
* FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder
* is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no
* obligations to provide maintenance, support, updates, enhancements or
* modifications. In no event shall Memorial Sloan-Kettering Cancer Center be
* liable to any party for direct, indirect, special, incidental or
* consequential damages, including lost profits, arising out of the use of this
* software and its documentation, even if Memorial Sloan-Kettering Cancer
* Center has been advised of the possibility of such damage.
*/

/*
* This file is part of cBioPortal.
*
* cBioPortal is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package org.cbio.portal.pipelines.importer;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would change all packages to be consistent with the primary portal project. I would also shorten it: org.cbioportal.importer.


import org.cbio.portal.pipelines.importer.config.BatchConfiguration;

import java.io.*;
import java.util.*;
import org.apache.commons.cli.*;
import org.apache.commons.logging.*;

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.ConfigurableApplicationContext;
import org.springframework.batch.core.*;
import org.springframework.batch.core.launch.JobLauncher;

/**
*
* @author ochoaa
*/
@SpringBootApplication
public class ImporterPipeline {

private static final Log LOG = LogFactory.getLog(ImporterPipeline.class);

private static Options getOptions(String[] args) {
Options gnuOptions = new Options();
gnuOptions.addOption("h", "help", false, "shows this help document and quits.")
.addOption("i", "import_study", true, "Cancer study directory to import")
.addOption("d", "delete_study", true, "Cancer study identifier for deleting study");
return gnuOptions;
}

private static void help(Options gnuOptions, int exitStatus) {
HelpFormatter helpFormatter = new HelpFormatter();
helpFormatter.printHelp("ImporterPipeline", gnuOptions);
System.exit(exitStatus);
}

private static void launchImporterJob(String[] args, String stagingDirectory) throws Exception {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lets remove all references to stagingDirectory - replace with studyDirectory.


SpringApplication app = new SpringApplication(ImporterPipeline.class);
ConfigurableApplicationContext ctx = app.run(args);
JobLauncher jobLauncher = ctx.getBean(JobLauncher.class);

Job batchImporterJob = ctx.getBean(BatchConfiguration.BATCH_STUDY_IMPORTER_JOB, Job.class);

JobParameters jobParameters = new JobParametersBuilder()
.addString("stagingDirectory", stagingDirectory)
.addDate("date", new Date())
.toJobParameters();

JobExecution jobExecution = jobLauncher.run(batchImporterJob, jobParameters);
if (jobExecution.getExitStatus().getExitCode().equals("STOPPED")) {
LOG.error("Error importing cancer study.");
}
ctx.close();
}

private static void launchDeleteStudyJob(String[] args, String cancerStudyIdentifier) throws Exception {
SpringApplication app = new SpringApplication(ImporterPipeline.class);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor formatting issue - none of the other methods in this class have a space following the method signature.

ConfigurableApplicationContext ctx = app.run(args);
JobLauncher jobLauncher = ctx.getBean(JobLauncher.class);

Job deleteStudyJob = ctx.getBean(BatchConfiguration.DELETE_CANCER_STUDY_JOB, Job.class);

JobParameters jobParameters = new JobParametersBuilder()
.addString("cancerStudyIdentifier", cancerStudyIdentifier)
.toJobParameters();

JobExecution jobExecution = jobLauncher.run(deleteStudyJob, jobParameters);
ctx.close();
}

public static void main(String[] args) throws Exception {
Options gnuOptions = ImporterPipeline.getOptions(args);
CommandLineParser parser = new GnuParser();
CommandLine commandLine = parser.parse(gnuOptions, args);
if (commandLine.hasOption("h") ||
(!commandLine.hasOption("i") && !commandLine.hasOption("d"))) {
help(gnuOptions, 0);
}

if (commandLine.hasOption("d")) {
String cancerStudyIdentifier = commandLine.getOptionValue("d");
launchDeleteStudyJob(args, cancerStudyIdentifier);
}

if (commandLine.hasOption("i")) {
String stagingDirectory = commandLine.getOptionValue("i");
if (!(new File(stagingDirectory).exists())) {
LOG.error("Staging directory does not exist - please check argument: " + stagingDirectory);
System.exit(2);
}
launchImporterJob(args, stagingDirectory);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should support a directory of subdirectories.

}
}

}
Loading