diff --git a/.gitignore b/.gitignore index 7148592..c0b9bb4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,5 @@ *~ -*.iml -.idea -build/ -/classpath/ -/.gradle /pkg/ -*.gemspec +/tmp/ +/.bundle/ +/Gemfile.lock diff --git a/.ruby-version b/.ruby-version new file mode 100644 index 0000000..7f9ec44 --- /dev/null +++ b/.ruby-version @@ -0,0 +1 @@ +jruby-9.1.13.0 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 3222364..0000000 --- a/.travis.yml +++ /dev/null @@ -1,9 +0,0 @@ -language: java -jdk: - - openjdk7 - - oraclejdk7 - - oraclejdk8 -sudo: false -script: - - ./gradlew --info check - - ./gradlew --info test diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..9784aee --- /dev/null +++ b/Gemfile @@ -0,0 +1,2 @@ +source 'https://rubygems.org/' +gemspec diff --git a/LICENSE.txt b/LICENSE.txt old mode 100755 new mode 100644 diff --git a/README.md b/README.md index 4e2b663..cf32067 100644 --- a/README.md +++ b/README.md @@ -1,44 +1,66 @@ -# Google Spreadsheets output plugin for Embulk [![Build Status](https://travis-ci.org/kataring/embulk-output-google_spreadsheets.svg?branch=master)](https://travis-ci.org/kataring/embulk-output-google_spreadsheets) [![Gem Version](https://badge.fury.io/rb/embulk-output-google_spreadsheets.svg)](http://badge.fury.io/rb/embulk-output-google_spreadsheets) - -Embulk output plugin to load into Google Spreadsheets. +# Google Spreadsheets output plugin for Embulk ## Overview * **Plugin type**: output * **Load all or nothing**: no * **Resume supported**: no -* **Cleanup supported**: yes +* **Cleanup supported**: no -## Usage +## Configuration -### Install plugin +- auth_method (string, default: 'authorized_user'): 'authorized_user' or 'service_account' +- json_keyfile (string, required): credential file path or `content` string +- spreadsheets_url (string, required): your spreadsheet's url +- worksheet_title (string, required): worksheet's title +- mode (string, default: append): writing record method, available mode are `append` and `replace` +- header_line (bool, default: false): if true, write header to first record +- start_row (integer, default: 1): specific the start row +- start_column (integer, default: 1): specific the start column +- null_string (string, default: ''): replace null to `null_string` +- default_timezone (string, default: '+00:00'): time zone offset of timestamp columns +- default_timestamp_format (string, default: '%Y-%m-%d %H:%M:%S.%6N %z'): output format of timestamp columns -``` -embulk gem install embulk-output-google_spreadsheets -``` +**json_keyfile** +specific the credential file which the Google Developer Console provides for authorization. +https://console.developers.google.com/apis/credentials +if use oauth, should be included the `refresh_token` field in credential json file. -## Configuration - -- **service_account_email**: Your Google service account email (string, required) -- **p12_keyfile**: Fullpath of private key in P12(PKCS12) format (string, required) -- **spreadsheet_id**: Your spreadsheet id (string, required) -- **sheet_index**: sheet index (int, optional default: 0) -- **application_name**: Anything you like (string, optional defaulf: "Embulk-GoogleSpreadsheets-OutputPlugin") +``` +{ + "client_id": "******************************************************", + "client_secret": "***************************", + "refresh_token": "***************************" +} +``` ## Example -```yaml +``` out: type: google_spreadsheets - service_account_email: 'XXXXXXXXXXXXXXXXXXXXXXXX@developer.gserviceaccount.com' - p12_keyfile: '/tmp/embulk.p12' - spreadsheet_id: '1RPXaB85DXM7sGlpFYIcpoD2GWFpktgh0jBHlF4m1a0A' + json_keyfile: + content: | + { + "client_id": "******************************************************", + "client_secret": "***************************", + "refresh_token": "***************************" + } + # json_keyfile: './keyfile.json' + spreadsheets_url: 'https://docs.google.com/spreadsheets/d/16RSM_xj5ZB4rz0WBlnIbD1KHO46KASnAY04e_oYUSEE/edit' + worksheet_title: 'シート1' + start_row: 1 + start_column: 1 + header_line: true + null_string : '(NULL)' + default_timezone: '+09:00' + default_timestamp_format: "%Y-%m-%d %H:%M:%S %z" ``` ## Build ``` -$ ./gradlew gem +$ rake ``` diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..8bfbf16 --- /dev/null +++ b/Rakefile @@ -0,0 +1,3 @@ +require "bundler/gem_tasks" + +task default: :build diff --git a/build.gradle b/build.gradle deleted file mode 100644 index 41d4791..0000000 --- a/build.gradle +++ /dev/null @@ -1,76 +0,0 @@ -plugins { - id "com.jfrog.bintray" version "1.1" - id "com.github.jruby-gradle.base" version "0.1.5" - id "java" -} -import com.github.jrubygradle.JRubyExec -repositories { - mavenCentral() - jcenter() -} -configurations { - provided -} - -version = "0.2.0" - -dependencies { - compile "org.embulk:embulk-core:0.7.4" - provided "org.embulk:embulk-core:0.7.4" - testCompile "junit:junit:4.+" - - compile 'com.google.api-client:google-api-client:1.19.1' - compile 'com.google.gdata:core:1.47.1' - compile 'com.google.apis:google-api-services-drive:v2-rev184-1.20.0' -} - -task classpath(type: Copy, dependsOn: ["jar"]) { - doFirst { file("classpath").deleteDir() } - from (configurations.runtime - configurations.provided + files(jar.archivePath)) - into "classpath" -} -clean { delete "classpath" } - -task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) { - jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build" - script "${project.name}.gemspec" - doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") } -} - -task gemPush(type: JRubyExec, dependsOn: ["gem"]) { - jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push" - script "pkg/${project.name}-${project.version}.gem" -} - -task "package"(dependsOn: ["gemspec", "classpath"]) << { - println "> Build succeeded." - println "> You can run embulk with '-L ${file(".").absolutePath}' argument." -} - -task gemspec { - ext.gemspecFile = file("${project.name}.gemspec") - inputs.file "build.gradle" - outputs.file gemspecFile - doLast { gemspecFile.write($/ -Gem::Specification.new do |spec| - spec.name = "${project.name}" - spec.version = "${project.version}" - spec.authors = ["Noriaki Katayama"] - spec.summary = %[Google Spreadsheets output plugin for Embulk] - spec.description = %[Dumps records to Google Spreadsheets.] - spec.email = ["kataring@gmail.com"] - spec.licenses = ["MIT"] - spec.homepage = "https://github.com/kataring/embulk-output-google_spreadsheets" - - spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] - spec.test_files = spec.files.grep(%r"^(test|spec)/") - spec.require_paths = ["lib"] - - #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION'] - spec.add_development_dependency 'bundler', ['~> 1.0'] - spec.add_development_dependency 'rake', ['>= 10.0'] -end -/$) - } -} -clean { delete "${project.name}.gemspec" } diff --git a/embulk-output-google_spreadsheets.gemspec b/embulk-output-google_spreadsheets.gemspec new file mode 100644 index 0000000..90c51b7 --- /dev/null +++ b/embulk-output-google_spreadsheets.gemspec @@ -0,0 +1,20 @@ +Gem::Specification.new do |spec| + spec.name = "embulk-output-google_spreadsheets" + spec.version = "0.1.0" + spec.authors = ["potato2003"] + spec.summary = "Google Spreadsheets output plugin for Embulk" + spec.description = "Dumps records to Google Spreadsheets." + spec.email = ["potato2003@gmail.com"] + spec.licenses = ["MIT"] + # TODO set this: spec.homepage = "https://github.com/ryuji.ito/embulk-output-google_spreadsheets" + + spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] + spec.test_files = spec.files.grep(%r{^(test|spec)/}) + spec.require_paths = ["lib"] + + #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION'] + spec.add_development_dependency 'embulk', ['>= 0.8.35'] + spec.add_development_dependency 'bundler', ['>= 1.10.6'] + spec.add_development_dependency 'rake', ['>= 10.0'] + spec.add_dependency 'google-api-client' +end diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar deleted file mode 100755 index c97a8bd..0000000 Binary files a/gradle/wrapper/gradle-wrapper.jar and /dev/null differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties deleted file mode 100755 index 06b273e..0000000 --- a/gradle/wrapper/gradle-wrapper.properties +++ /dev/null @@ -1,6 +0,0 @@ -#Tue Aug 11 00:26:20 PDT 2015 -distributionBase=GRADLE_USER_HOME -distributionPath=wrapper/dists -zipStoreBase=GRADLE_USER_HOME -zipStorePath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip diff --git a/gradlew b/gradlew deleted file mode 100755 index 91a7e26..0000000 --- a/gradlew +++ /dev/null @@ -1,164 +0,0 @@ -#!/usr/bin/env bash - -############################################################################## -## -## Gradle start up script for UN*X -## -############################################################################## - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS="" - -APP_NAME="Gradle" -APP_BASE_NAME=`basename "$0"` - -# Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD="maximum" - -warn ( ) { - echo "$*" -} - -die ( ) { - echo - echo "$*" - echo - exit 1 -} - -# OS specific support (must be 'true' or 'false'). -cygwin=false -msys=false -darwin=false -case "`uname`" in - CYGWIN* ) - cygwin=true - ;; - Darwin* ) - darwin=true - ;; - MINGW* ) - msys=true - ;; -esac - -# For Cygwin, ensure paths are in UNIX format before anything is touched. -if $cygwin ; then - [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"` -fi - -# Attempt to set APP_HOME -# Resolve links: $0 may be a link -PRG="$0" -# Need this for relative symlinks. -while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG=`dirname "$PRG"`"/$link" - fi -done -SAVED="`pwd`" -cd "`dirname \"$PRG\"`/" >&- -APP_HOME="`pwd -P`" -cd "$SAVED" >&- - -CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar - -# Determine the Java command to use to start the JVM. -if [ -n "$JAVA_HOME" ] ; then - if [ -x "$JAVA_HOME/jre/sh/java" ] ; then - # IBM's JDK on AIX uses strange locations for the executables - JAVACMD="$JAVA_HOME/jre/sh/java" - else - JAVACMD="$JAVA_HOME/bin/java" - fi - if [ ! -x "$JAVACMD" ] ; then - die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -else - JAVACMD="java" - which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." -fi - -# Increase the maximum file descriptors if we can. -if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then - MAX_FD_LIMIT=`ulimit -H -n` - if [ $? -eq 0 ] ; then - if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then - MAX_FD="$MAX_FD_LIMIT" - fi - ulimit -n $MAX_FD - if [ $? -ne 0 ] ; then - warn "Could not set maximum file descriptor limit: $MAX_FD" - fi - else - warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" - fi -fi - -# For Darwin, add options to specify how the application appears in the dock -if $darwin; then - GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" -fi - -# For Cygwin, switch paths to Windows format before running java -if $cygwin ; then - APP_HOME=`cygpath --path --mixed "$APP_HOME"` - CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` - - # We build the pattern for arguments to be converted via cygpath - ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` - SEP="" - for dir in $ROOTDIRSRAW ; do - ROOTDIRS="$ROOTDIRS$SEP$dir" - SEP="|" - done - OURCYGPATTERN="(^($ROOTDIRS))" - # Add a user-defined pattern to the cygpath arguments - if [ "$GRADLE_CYGPATTERN" != "" ] ; then - OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" - fi - # Now convert the arguments - kludge to limit ourselves to /bin/sh - i=0 - for arg in "$@" ; do - CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` - CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option - - if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition - eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` - else - eval `echo args$i`="\"$arg\"" - fi - i=$((i+1)) - done - case $i in - (0) set -- ;; - (1) set -- "$args0" ;; - (2) set -- "$args0" "$args1" ;; - (3) set -- "$args0" "$args1" "$args2" ;; - (4) set -- "$args0" "$args1" "$args2" "$args3" ;; - (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; - (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; - (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; - (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; - (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; - esac -fi - -# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules -function splitJvmOpts() { - JVM_OPTS=("$@") -} -eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS -JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME" - -exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@" diff --git a/gradlew.bat b/gradlew.bat deleted file mode 100755 index aec9973..0000000 --- a/gradlew.bat +++ /dev/null @@ -1,90 +0,0 @@ -@if "%DEBUG%" == "" @echo off -@rem ########################################################################## -@rem -@rem Gradle startup script for Windows -@rem -@rem ########################################################################## - -@rem Set local scope for the variables with windows NT shell -if "%OS%"=="Windows_NT" setlocal - -@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS= - -set DIRNAME=%~dp0 -if "%DIRNAME%" == "" set DIRNAME=. -set APP_BASE_NAME=%~n0 -set APP_HOME=%DIRNAME% - -@rem Find java.exe -if defined JAVA_HOME goto findJavaFromJavaHome - -set JAVA_EXE=java.exe -%JAVA_EXE% -version >NUL 2>&1 -if "%ERRORLEVEL%" == "0" goto init - -echo. -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:findJavaFromJavaHome -set JAVA_HOME=%JAVA_HOME:"=% -set JAVA_EXE=%JAVA_HOME%/bin/java.exe - -if exist "%JAVA_EXE%" goto init - -echo. -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:init -@rem Get command-line arguments, handling Windowz variants - -if not "%OS%" == "Windows_NT" goto win9xME_args -if "%@eval[2+2]" == "4" goto 4NT_args - -:win9xME_args -@rem Slurp the command line arguments. -set CMD_LINE_ARGS= -set _SKIP=2 - -:win9xME_args_slurp -if "x%~1" == "x" goto execute - -set CMD_LINE_ARGS=%* -goto execute - -:4NT_args -@rem Get arguments from the 4NT Shell from JP Software -set CMD_LINE_ARGS=%$ - -:execute -@rem Setup the command line - -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar - -@rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% - -:end -@rem End local scope for the variables with windows NT shell -if "%ERRORLEVEL%"=="0" goto mainEnd - -:fail -rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of -rem the _cmd.exe /c_ return code! -if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 -exit /b 1 - -:mainEnd -if "%OS%"=="Windows_NT" endlocal - -:omega diff --git a/lib/embulk/output/google_spreadsheets.rb b/lib/embulk/output/google_spreadsheets.rb index 65b1055..f21dbe3 100644 --- a/lib/embulk/output/google_spreadsheets.rb +++ b/lib/embulk/output/google_spreadsheets.rb @@ -1,3 +1,149 @@ -Embulk::JavaPlugin.register_output( - "google_spreadsheets", "org.embulk.output.google_spreadsheets.GoogleSpreadsheetsOutputPlugin", - File.expand_path('../../../../classpath', __FILE__)) +# frozen_string_literal: true + +require 'googleauth' +require 'google/apis/sheets_v4' + +require_relative 'google_spreadsheets/write_buffer' +require_relative 'google_spreadsheets/spreadsheets_client' + +module Embulk + module Output + + class GoogleSpreadsheets < OutputPlugin + Plugin.register_output("google_spreadsheets", self) + + # support config by file path or content which supported by org.embulk.spi.unit.LocalFile + # json_keyfile: + # content: | + class LocalFile + # return JSON string + def self.load(v) + if v.is_a?(String) + File.read(v) + elsif v.is_a?(Hash) + v['content'] + end + end + end + + def self.transaction(config, schema, count, &control) + # configuration code: + task = { + # required + "json_keyfile" => config.param("json_keyfile", LocalFile, nil), + "spreadsheets_url" => config.param("spreadsheets_url", :string), + "worksheet_title" => config.param("worksheet_title", :string), + + # optional + "auth_method" => config.param("auth_method", :string, default: "authorized_user"), # 'auth_method' or 'service_account' + "mode" => config.param("mode", :string, default: "append"), # `replace` or `append` + "header_line" => config.param("header_line", :bool, default: false), + "start_column" => config.param("start_column", :integer, default: 1), + "start_row" => config.param("start_row", :integer, default: 1), + "null_string" => config.param("null_string", :string, default: ""), + "default_timezone" => config.param("default_timezone", :string, default: "+00:00"), + "default_timestamp_format" => config.param("default_timestamp_format", :string, default: "%Y-%m-%d %H:%M:%S.%6N %z"), + } + + # + # prepare to write records + # + + # support 'UTC' specially + task["default_timezone"] = "+00:00" if task["default_timezone"] == 'UTC' + + client = SpreadsheetsClient.new(task, schema) + + mode = task["mode"].to_sym + raise "unsupported mode: #{mode.inspect}" unless [:append, :replace].include? mode + + if mode == :replace + client.clear_records + end + + if task['header_line'] + client.write_header_line + end + + Embulk.logger.info("set task: #{task.inspect}") + + task_reports = yield(task) + next_config_diff = {} + return next_config_diff + end + + def init + @mode = task["mode"].to_sym + @null_string = task["null_string"] + + @client = SpreadsheetsClient.new(task, schema) + init_cursor @client + @buffer = WriteBuffer.new + end + + def close + end + + def add(page) + num_records = 0 + + page.each do |values| + record = schema.names.zip(schema.types, values) + + f = record.map do |(name, type, value)| + format(type, value) + end + + @buffer.write_record(f) + num_records += 1 + end + + Embulk.logger.info("buffering #{num_records} records") + end + + def finish + total = 0 + + @buffer.each_slice(1000) do |chunked_records| + Embulk.logger.debug { "flush buffer: write %d records to spreadsheet" % chunked_records.length } + total += chunked_records.length + + @client.write_records(chunked_records) + end + + Embulk.logger.info("finish to write total %d records" % total) + + @buffer.close + end + + def abort + @buffer.close + end + + def commit + task_report = {} + return task_report + end + + def init_cursor(client) + client.set_cursor_to_last_row + end + + def format(type, v) + return @null_string if v.nil? + + case type + when :timestamp + zone_offset = task['default_timezone'] + format = task['default_timestamp_format'] + + v.dup.localtime(zone_offset).strftime(format) + when :json + v.to_json + else + v + end + end + end + end +end diff --git a/lib/embulk/output/google_spreadsheets/spreadsheets_client.rb b/lib/embulk/output/google_spreadsheets/spreadsheets_client.rb new file mode 100644 index 0000000..c622d4d --- /dev/null +++ b/lib/embulk/output/google_spreadsheets/spreadsheets_client.rb @@ -0,0 +1,180 @@ +# frozen_string_literal: true + +module Embulk + module Output + class GoogleSpreadsheets < OutputPlugin + + class SpreadsheetsClient + + DEFAULT_SCOPE = [ + Google::Apis::SheetsV4::AUTH_SPREADSHEETS + ] + + attr_reader :service, :spreadsheets_url, :spreadsheets_id, :worksheet_title, :schema, :task, :cursor + + def initialize(task, schema) + @task = task + @schema = schema + + @spreadsheets_url = task['spreadsheets_url'] + @spreadsheets_id = spreadsheets_id_by_url(@spreadsheets_url) + if @spreadsheets_id.nil? + raise ArgumentError, "failed to extract spreadsheet's id from url, maybe given spreadsheets_url is invalid." + end + + @worksheet_title = task['worksheet_title'] + + @service = Google::Apis::SheetsV4::SheetsService.new + @service.client_options.application_name = 'embulk-output-google_spreadsheets' + @service.authorization = make_credentials(task) + + @cursor = 0 + end + + def replace_mode? + task['mode'] == 'replace' + end + + def append_mode? + task['mode'] == 'append' + end + + def write_header_line + vr = ensure_value_range(headers) + service.update_spreadsheet_value spreadsheets_id, range, vr, value_input_option: 'RAW' + end + + def write_records(records) + vr = ensure_value_range(records) + service.append_spreadsheet_value spreadsheets_id, next_range(records), vr, value_input_option: 'RAW' + end + + def append(records) + vr = ensure_value_range(records) + service.append_spreadsheet_value spreadsheets_id, all_records_range, vr, value_input_option: 'RAW' + end + + def set_cursor_to_last_row + res = service.get_spreadsheet_values spreadsheets_id, all_records_range + @cursor = res.values&.size || 0 + + Embulk.logger.info { "fetched existing #{@cursor} records (including header line), set next records position at #{@cursor + task['start_row']}" } + end + + def clear_records + service.clear_values spreadsheets_id, all_records_range + end + + def headers + schema.names + end + + private + def make_credentials(task) + key = StringIO.new(JSON.parse(task['json_keyfile']).to_json) + + case task['auth_method'] + when 'authorized_user' + Google::Auth::UserRefreshCredentials.make_creds(json_key_io: key, scope: DEFAULT_SCOPE) + when 'service_account' + Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: DEFAULT_SCOPE) + else + raise ConfigError.new("Unknown auth method: #{task['auth_method']}") + end + end + + def spreadsheets_id_by_url(url) + base_url = 'https://docs.google.com/spreadsheets/d/' + url.scan(%r{#{base_url}([^/]+)}).dig(0, 0) # return spreadsheets id if matche succeed, nil otherwise + end + + def range + c = col_num_to_a1 task["start_column"] + r = task["start_row"] + + "#{safe_worksheet_title}!#{c}#{r}" + end + + def next_range(records) + c = col_num_to_a1 task["start_column"] + r = task["start_row"] + cursor + @cursor += records.length + + "#{safe_worksheet_title}!#{c}#{r}" + end + + def all_records_range + end_col = col_num_to_a1(task["start_column"] + headers.length - 1) + "#{range}:#{end_col}" + end + + # retrun a worksheet title safe for spreadsheet api + def safe_worksheet_title + @safe_worksheet_title ||= begin + title_safe = worksheet_title.gsub("'", "''") # escape signle quote + "'#{title_safe}'" # surround title with single quotes + end + end + + # ensure given data type as value range + # see: https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets.values#ValueRange + # + # { + # major_dimension: String // DIMENSION_UNSPECIFIED, ROWS or COLUMNS + # values: Array[Array[Object]] + # } + # + # ValueRange.values is Array>, + # outer array representing list of rows and + # iner array representing list of columns in row. + def ensure_value_range(arr_or_obj) + { + major_dimension: "ROWS", + values: ensure_2dimension_array(arr_or_obj) + } + end + + def ensure_2dimension_array(arr_or_obj) + arr = ensure_array(arr_or_obj) + return [arr] unless arr[0].is_a? Array + + arr + end + + def ensure_array(arr_or_obj) + return [arr_or_obj] unless arr_or_obj.is_a? Array + + arr_or_obj + end + + # convert col number to a1 annotation + # + def col_num_to_a1(col_num) + offset = 'A'.ord + radix = 26 # number of letters in alphabet + + a1 = [] + + while 1 <= col_num do + # adjust col number. + # A1 annotation is base-26 (A-Z) but 0 does not exists. + # + # for example, + # (NA) == 0 base-10 + # A base-26 == 1 base-10 + # Z base-26 == 26 base-10 + # AA base-26 == 27 base-10 (if A is 0, AA also become 0) + # + col_num -= 1 + a1 << (col_num % radix + offset).chr + col_num /= radix + end + + return 'A' if a1.empty? + + a1.reverse.join + end + end + end + end +end diff --git a/lib/embulk/output/google_spreadsheets/write_buffer.rb b/lib/embulk/output/google_spreadsheets/write_buffer.rb new file mode 100644 index 0000000..c019255 --- /dev/null +++ b/lib/embulk/output/google_spreadsheets/write_buffer.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +require 'forwardable' +require 'tempfile' + +module Embulk + module Output + class GoogleSpreadsheets < OutputPlugin + + class WriteBuffer + extend Forwardable + include Enumerable + + attr_reader :file + + def initialize + @file = Tempfile.new(safe_class_name) + end + + def close + return if file.nil? + + file.close + file.unlink + file = nil + end + + def write_record(r) + write(Marshal.dump(r) << "\n") + end + + def each + file.rewind + file.each do |serialized_record| + yield Marshal.restore(serialized_record) + end + end + + def_delegators :@file, :write + + private + # retrun a class name safe for filesystem + def safe_class_name() + self.class.name.downcase.gsub(/[^\d\w]/, '_') + end + + end + end + end +end diff --git a/src/main/java/org/embulk/output/google_spreadsheets/GoogleSpreadsheetsOutputPlugin.java b/src/main/java/org/embulk/output/google_spreadsheets/GoogleSpreadsheetsOutputPlugin.java deleted file mode 100644 index 3a758fa..0000000 --- a/src/main/java/org/embulk/output/google_spreadsheets/GoogleSpreadsheetsOutputPlugin.java +++ /dev/null @@ -1,276 +0,0 @@ -package org.embulk.output.google_spreadsheets; - -import org.embulk.config.TaskReport; -import org.embulk.config.Config; -import org.embulk.config.ConfigDefault; -import org.embulk.config.ConfigDiff; -import org.embulk.config.ConfigSource; -import org.embulk.config.Task; -import org.embulk.config.TaskSource; -import org.embulk.spi.Exec; -import org.embulk.spi.OutputPlugin; -import org.embulk.spi.Schema; -import org.embulk.spi.TransactionalPageOutput; -import org.embulk.spi.Page; -import org.embulk.spi.Column; -import org.embulk.spi.ColumnVisitor; -import org.embulk.spi.PageReader; -import com.google.api.client.googleapis.auth.oauth2.GoogleCredential; -import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport; -import com.google.api.client.json.jackson2.JacksonFactory; -import com.google.gdata.client.spreadsheet.SpreadsheetService; -import com.google.gdata.data.spreadsheet.*; -import com.google.gdata.util.ServiceException; -import com.google.api.services.drive.DriveScopes; -import com.google.api.client.json.JsonFactory; -import com.google.api.client.http.HttpTransport; - -import java.io.IOException; -import java.io.File; -import java.net.URL; -import java.util.List; -import java.util.Arrays; -import org.embulk.spi.time.Timestamp; -import org.joda.time.DateTimeZone; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; -import org.slf4j.Logger; -import java.security.GeneralSecurityException; - -public class GoogleSpreadsheetsOutputPlugin - implements OutputPlugin -{ - public interface PluginTask - extends Task - { - @Config("service_account_email") - public String getServiceAccountEmail(); - - @Config("spreadsheet_id") - public String getSpreadsheetId(); - - @Config("p12_keyfile") - public String getP12Keyfile(); - - @Config("sheet_index") - @ConfigDefault("0") - public int getSheetIndex(); - - @Config("application_name") - @ConfigDefault("\"Embulk-GoogleSpreadsheets-OutputPlugin\"") - public String getApplicationName(); - - @Config("default_timezone") - @ConfigDefault("\"UTC\"") - public String getDefaultTimezone(); - } - - private final Logger log; - - public GoogleSpreadsheetsOutputPlugin() - { - log = Exec.getLogger(getClass()); - } - - @Override - public ConfigDiff transaction(ConfigSource config, - Schema schema, int taskCount, - OutputPlugin.Control control) - { - PluginTask task = config.loadConfig(PluginTask.class); - - // retryable (idempotent) output: - // return resume(task.dump(), schema, taskCount, control); - - // non-retryable (non-idempotent) output: - control.run(task.dump()); - return Exec.newConfigDiff(); - } - - @Override - public ConfigDiff resume(TaskSource taskSource, - Schema schema, int taskCount, - OutputPlugin.Control control) - { - throw new UnsupportedOperationException("google_spreadsheets output plugin does not support resuming"); - } - - @Override - public void cleanup(TaskSource taskSource, - Schema schema, int taskCount, - List successTaskReports) - { - } - - private GoogleCredential getServiceAccountCredential(PluginTask task) - throws IOException, GeneralSecurityException - { - HttpTransport httpTransport = GoogleNetHttpTransport.newTrustedTransport(); - JsonFactory jsonFactory = JacksonFactory.getDefaultInstance(); - List scopes = Arrays.asList(DriveScopes.DRIVE, "https://spreadsheets.google.com/feeds"); - - return new GoogleCredential.Builder().setTransport(httpTransport) - .setJsonFactory(jsonFactory) - .setServiceAccountId(task.getServiceAccountEmail()) - .setServiceAccountPrivateKeyFromP12File(new File(task.getP12Keyfile())) - .setServiceAccountScopes(scopes) - .build(); - } - - @Override - public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex) - { - PluginTask task = taskSource.loadTask(PluginTask.class); - GoogleSpreadsheetsPageOutput pageOutput = new GoogleSpreadsheetsPageOutput(task); - pageOutput.open(schema); - return pageOutput; - } - - public class GoogleSpreadsheetsPageOutput implements TransactionalPageOutput - { - private PageReader pageReader; - private SpreadsheetService service; - private WorksheetEntry worksheet; - private ListEntry row; - private DateTimeFormatter formatter; - - public GoogleSpreadsheetsPageOutput(PluginTask task) { - try { - GoogleCredential credentials = getServiceAccountCredential(task); - service = new SpreadsheetService(task.getApplicationName()); - service.setProtocolVersion(SpreadsheetService.Versions.V3); - service.setOAuth2Credentials(credentials); - - URL entryUrl = new URL("https://spreadsheets.google.com/feeds/spreadsheets/" + task.getSpreadsheetId()); - SpreadsheetEntry spreadsheet = service.getEntry(entryUrl, SpreadsheetEntry.class); - - WorksheetFeed worksheetFeed = service.getFeed(spreadsheet.getWorksheetFeedUrl(), WorksheetFeed.class); - List worksheets = worksheetFeed.getEntries(); - worksheet = worksheets.get(task.getSheetIndex()); - - DateTimeZone zone = DateTimeZone.forID(task.getDefaultTimezone()); - formatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").withZone(zone); - } - catch (ServiceException e) { - throw new RuntimeException(e); - } - catch (IOException e) { - throw new RuntimeException(e); - } - catch (GeneralSecurityException e) { - throw new RuntimeException(e); - } - } - - void open(final Schema schema) - { - pageReader = new PageReader(schema); - } - - @Override - public void add(Page page) - { - log.debug("add: start"); - pageReader.setPage(page); - URL listFeedUrl = worksheet.getListFeedUrl(); - - while (pageReader.nextRecord()) { - row = new ListEntry(); - pageReader.getSchema().visitColumns(new ColumnVisitor() { - @Override - public void booleanColumn(Column column) { - if (pageReader.isNull(column)) { - log.debug("booleanColumn: null"); - row.getCustomElements().setValueLocal(column.getName(), ""); - } else { - log.debug("booleanColumn: " + pageReader.getBoolean(column)); - row.getCustomElements().setValueLocal(column.getName(), (pageReader.getBoolean(column) ? "true" : "false")); - } - } - - @Override - public void longColumn(Column column) { - if (pageReader.isNull(column)) { - log.debug("longColumn: null"); - row.getCustomElements().setValueLocal(column.getName(), ""); - } else { - log.debug("longColumn: " + pageReader.getLong(column)); - row.getCustomElements().setValueLocal(column.getName(), Long.toString(pageReader.getLong(column))); - } - } - - @Override - public void doubleColumn(Column column) { - if (pageReader.isNull(column)) { - log.debug("doubleColumn: null"); - row.getCustomElements().setValueLocal(column.getName(), ""); - } else { - log.debug("doubleColumn: " + pageReader.getDouble(column)); - row.getCustomElements().setValueLocal(column.getName(), Double.toString(pageReader.getDouble(column))); - } - } - - @Override - public void stringColumn(Column column) { - if (pageReader.isNull(column)) { - log.debug("stringColumn: null"); - row.getCustomElements().setValueLocal(column.getName(), ""); - } else { - log.debug("stringColumn: " + pageReader.getString(column)); - row.getCustomElements().setValueLocal(column.getName(), pageReader.getString(column)); - } - } - - @Override - public void timestampColumn(Column column) { - if (pageReader.isNull(column)) { - log.debug("timestampColumn: null"); - row.getCustomElements().setValueLocal(column.getName(), ""); - } else { - Timestamp timestamp = pageReader.getTimestamp(column); - String strTimestamp = formatter.print(timestamp.toEpochMilli()); - log.debug("timestampColumn: " + strTimestamp); - row.getCustomElements().setValueLocal(column.getName(), strTimestamp); - } - } - }); - - try { - row = service.insert(listFeedUrl, row); - } - catch (Exception e){ - log.warn("can not insert:" + e.toString()); - } - } - log.debug("add: end"); - } - - @Override - public void finish() - { - try { - // TODO - } finally { - close(); - } - } - - @Override - public void close() - { - // TODO do nothing - } - - @Override - public void abort() - { - // TODO do nothing - } - - @Override - public TaskReport commit() - { - return Exec.newTaskReport(); - } - } -} diff --git a/src/test/java/org/embulk/output/google_spreadsheets/TestGoogleSpreadsheetsOutputPlugin.java b/src/test/java/org/embulk/output/google_spreadsheets/TestGoogleSpreadsheetsOutputPlugin.java deleted file mode 100644 index 7ebfd0a..0000000 --- a/src/test/java/org/embulk/output/google_spreadsheets/TestGoogleSpreadsheetsOutputPlugin.java +++ /dev/null @@ -1,5 +0,0 @@ -package org.embulk.output.google_spreadsheets; - -public class TestGoogleSpreadsheetsOutputPlugin -{ -}