diff --git a/.goreleaser.yml b/.goreleaser.yml new file mode 100644 index 0000000..28dd0ba --- /dev/null +++ b/.goreleaser.yml @@ -0,0 +1,106 @@ +# GoReleaser configuration for ClippingKK CLI +version: 2 + +project_name: ck-cli + +before: + hooks: + # Clean up any previous builds + - go mod tidy + # Run tests before building + - go test ./... + +builds: + - main: ./cmd/ck-cli + binary: ck-cli + env: + - CGO_ENABLED=0 + goos: + - linux + - windows + - darwin + goarch: + - amd64 + - arm64 + # Build flags + flags: + - -trimpath + ldflags: + - -s -w + - -X main.Version={{.Version}} + - -X main.Commit={{.ShortCommit}} + # Ignore specific combinations that don't make sense + ignore: + - goos: windows + goarch: arm64 + +archives: + - format: tar.gz + # Use zip for Windows + format_overrides: + - goos: windows + format: zip + name_template: >- + {{ .ProjectName }}_ + {{- title .Os }}_ + {{- if eq .Arch "amd64" }}x86_64 + {{- else if eq .Arch "386" }}i386 + {{- else }}{{ .Arch }}{{ end }} + {{- if .Arm }}v{{ .Arm }}{{ end }} + files: + - README.md + - LICENSE + +checksum: + name_template: 'checksums.txt' + +snapshot: + name_template: "{{ incpatch .Version }}-next" + +changelog: + sort: asc + use: github + filters: + exclude: + - '^docs:' + - '^test:' + - '^ci:' + - '^chore:' + - Merge pull request + - Merge branch + groups: + - title: 'New Features' + regexp: '^.*?feat(\(.+\))??!?:.+$' + order: 0 + - title: 'Bug Fixes' + regexp: '^.*?fix(\(.+\))??!?:.+$' + order: 1 + - title: 'Performance Improvements' + regexp: '^.*?perf(\(.+\))??!?:.+$' + order: 2 + - title: 'Refactors' + regexp: '^.*?refactor(\(.+\))??!?:.+$' + order: 3 + - title: 'Others' + order: 999 + +release: + github: + owner: clippingkk + name: cli + draft: false + prerelease: auto + mode: replace + header: | + ## ClippingKK CLI {{.Tag}} + + Parse Amazon Kindle clippings and sync to ClippingKK service. + + ### Installation + + Download the appropriate binary for your platform from the assets below. + + ### What's Changed + footer: | + **Full Changelog**: https://github.com/clippingkk/cli/compare/{{ .PreviousTag }}...{{ .Tag }} + diff --git a/CLAUDE.md b/CLAUDE.md index 2d76d05..dab215b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,81 +4,136 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -ClippingKK CLI (`ck-cli`) is a Rust-based Terminal User Interface tool that parses Amazon Kindle's "My Clippings.txt" file into structured JSON format. It supports synchronization with the ClippingKK web service for cloud storage of reading highlights. +ClippingKK CLI (`ck-cli`) is a Go-based command-line tool that parses Amazon Kindle's "My Clippings.txt" file into structured JSON format. It supports synchronization with the ClippingKK web service for cloud storage of reading highlights. ## Key Commands ### Building and Development ```bash # Standard build -cargo build +make build +# or +go build -o ck-cli ./cmd/ck-cli -# Release build (optimized) -cargo build --release +# Install to GOPATH/bin +make install +# or +go install ./cmd/ck-cli # Run tests -cargo test +make test +# or +go test ./... -# Run benchmarks -cargo bench +# Run tests with coverage +make test-coverage # Format code -cargo fmt +make fmt +# or +go fmt ./... # Lint code -cargo clippy --all-features +make lint +# or +golangci-lint run # Run the CLI -cargo run -- [arguments] +./ck-cli [arguments] +# or +go run ./cmd/ck-cli [arguments] ``` -### Testing +### Cross-Platform Building ```bash -# Run all tests with verbose output -cargo test --verbose --all-features --workspace +# Build for all platforms +make build-all + +# Build for specific platforms +make build-linux +make build-windows +make build-macos + +# Release with GoReleaser +make release-dry # dry run +make release # actual release +``` + +### Testing and Development +```bash +# Run all tests +go test ./... + +# Run tests with verbose output +go test -v ./... # Run specific test -cargo test test_name +go test -run TestName ./internal/parser -# Generate code coverage (requires cargo-tarpaulin) -cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 120 --out Xml +# Run benchmarks +make bench +# or +go test -bench=. ./... + +# Test with example data +make run-example +make test-parse-stdin ``` ### Running the Application ```bash # Parse a Kindle clippings file to JSON -cargo run -- parse -i "/path/to/My Clippings.txt" -o "/path/output.json" +ck-cli parse --input "/path/to/My Clippings.txt" --output "/path/output.json" # Parse from stdin to stdout -cat "My Clippings.txt" | cargo run -- parse +cat "My Clippings.txt" | ck-cli parse # Sync to ClippingKK web service (requires login first) -cargo run -- login --token "YOUR_TOKEN" -cargo run -- parse --input "/path/to/My Clippings.txt" --output http +ck-cli login --token "YOUR_TOKEN" +ck-cli parse --input "/path/to/My Clippings.txt" --output http ``` ## Architecture and Code Structure +### Project Structure +``` +cmd/ck-cli/ # Main CLI application entry point +internal/ +├── commands/ # CLI command implementations (login, parse) +├── config/ # Configuration management (TOML files) +├── http/ # HTTP client and GraphQL integration +├── models/ # Data models (ClippingItem, etc.) +└── parser/ # Kindle clippings parser (core logic) +``` + ### Core Components -1. **Parser Module (`src/parser.rs`)**: The heart of the application - - Handles multi-language parsing (Chinese, English, Japanese) - - Uses regex patterns to extract clipping components - - Converts Kindle's format to `TClippingItem` structs - - Key struct: `TClippingItem` with fields: `title`, `content`, `pageAt`, `createdAt` +1. **Main CLI (`cmd/ck-cli/main.go`)**: Application entry point + - Uses `urfave/cli/v2` framework for command structure + - Handles graceful shutdown with context + - Version and build info injection + +2. **Parser Module (`internal/parser/parser.go`)**: The heart of the application + - Handles multi-language parsing (Chinese, English) + - Uses regex patterns to extract clipping components + - Converts Kindle's format to `ClippingItem` structs + - Key struct: `ClippingItem` with fields: `Title`, `Content`, `PageAt`, `CreatedAt` -2. **HTTP/GraphQL Integration (`src/http.rs`, `src/graphql.rs`)**: +3. **HTTP/GraphQL Integration (`internal/http/client.go`)**: - Syncs parsed clippings to ClippingKK web service - Uses GraphQL mutations for data upload - - Handles authentication via Bearer tokens + - Handles chunked uploads with concurrency control + - Proper error handling and retry logic -3. **Configuration (`src/config.rs`)**: +4. **Configuration (`internal/config/config.go`)**: - Manages `.ck-cli.toml` in user's home directory - Stores HTTP endpoint and authentication headers + - TOML format with `pelletier/go-toml/v2` -4. **Authentication (`src/auth.rs`)**: - - Interactive login flow with QR code display - - Token management for API access +5. **Commands (`internal/commands/`)**: + - `login.go`: Authentication flow and token management + - `parse.go`: Main parsing and output logic + - Clean separation of CLI logic from business logic ### Data Flow 1. Input: Kindle's "My Clippings.txt" file (UTF-8 encoded) @@ -87,31 +142,66 @@ cargo run -- parse --input "/path/to/My Clippings.txt" --output http ### Key Technical Details -- **Async Runtime**: Uses Tokio for async operations -- **Error Handling**: Returns `Result>` for main operations -- **Date Parsing**: Handles multiple date formats across languages using chrono +- **CLI Framework**: Uses `urfave/cli/v2` for robust command handling +- **HTTP Client**: Custom HTTP client with proper context handling +- **Concurrency**: Controlled concurrent uploads with semaphores +- **Error Handling**: Structured error handling with context +- **Date Parsing**: Handles multiple date formats across languages - **Regex Patterns**: Language-specific patterns for parsing clipping headers -- **JSON Serialization**: Uses serde for type-safe JSON handling +- **JSON Serialization**: Standard library JSON with custom marshaling ### Testing Approach -- Unit tests in `tests/tests.rs` validate parsing against fixture files -- Fixtures cover edge cases: multiple languages, special characters, various formats -- Benchmarks in `benches/parse.rs` measure parsing performance using Criterion +- Unit tests in `*_test.go` files alongside source code +- Test fixtures cover edge cases: multiple languages, special characters, various formats +- Table-driven tests for comprehensive coverage +- Integration tests for command-line interface + +### Build and Release + +- **GoReleaser**: Multi-platform builds and releases +- **Docker**: Container builds for easy deployment +- **Package Managers**: Homebrew, APT, RPM, AUR support +- **CI/CD**: GitHub Actions for testing and releases -### CI/CD Workflows +### Dependencies -1. **Code Coverage**: Runs on master branch and PRs, reports to Codecov -2. **Benchmark Comparison**: Compares performance metrics on PRs -3. **Release Builds**: Automated multi-platform builds for releases +- `github.com/urfave/cli/v2`: CLI framework +- `github.com/pelletier/go-toml/v2`: TOML configuration +- Standard library for HTTP, JSON, regex, time handling ### Important Patterns -- The parser uses a state machine approach to handle multi-line clippings -- Language detection is based on regex pattern matching of clipping headers -- HTTP client reuses connections for batch uploads -- Configuration persists between sessions in TOML format +- Clean architecture with internal packages +- Interface-based design for testability +- Context-aware operations for cancellation +- Structured logging and error handling +- Configuration with sensible defaults + +## Development Guidelines + +### Code Style +- Follow standard Go conventions (`gofmt`, `go vet`) +- Use `golangci-lint` for comprehensive linting +- Write tests for all new functionality +- Document exported functions and types + +### Testing +- Write table-driven tests where appropriate +- Include both positive and negative test cases +- Test error conditions and edge cases +- Use test fixtures for parser testing + +### Performance +- Parser optimized for large clipping files +- Concurrent HTTP uploads for better sync performance +- Minimal memory allocation in hot paths ## Commit Guidelines -- The commit message must fit with Conventional Commits rules, and follow scopes (feat, fix, refactor, perf) \ No newline at end of file +- The commit message must fit with Conventional Commits rules +- Use scopes: `feat`, `fix`, `refactor`, `perf`, `test`, `docs`, `build` +- Examples: + - `feat(parser): add support for Japanese clippings` + - `fix(http): handle network timeouts properly` + - `refactor(config): simplify TOML configuration` \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ffe0cd3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,35 @@ +# Build stage +FROM golang:1.21-alpine AS builder + +# Install ca-certificates for SSL +RUN apk add --no-cache ca-certificates git + +# Set working directory +WORKDIR /app + +# Copy go mod files +COPY go.mod go.sum ./ + +# Download dependencies +RUN go mod download + +# Copy source code +COPY . . + +# Build the application +RUN CGO_ENABLED=0 GOOS=linux go build \ + -ldflags="-s -w" \ + -o ck-cli \ + ./cmd/ck-cli + +# Final stage +FROM scratch + +# Copy ca-certificates from builder +COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ + +# Copy the binary +COPY --from=builder /app/ck-cli /ck-cli + +# Set the entrypoint +ENTRYPOINT ["/ck-cli"] \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..276a6ec --- /dev/null +++ b/Makefile @@ -0,0 +1,155 @@ +# Makefile for ClippingKK CLI (Go version) + +# Variables +BINARY_NAME=ck-cli +CMD_DIR=./cmd/ck-cli +VERSION?=dev +COMMIT?=$(shell git rev-parse --short HEAD 2>/dev/null || echo "unknown") +BUILD_FLAGS=-ldflags="-s -w -X main.Version=$(VERSION) -X main.Commit=$(COMMIT)" + +# Default target +.DEFAULT_GOAL := build + +# Development commands +.PHONY: build +build: ## Build the CLI for current platform + go build $(BUILD_FLAGS) -o $(BINARY_NAME) $(CMD_DIR) + +.PHONY: build-release +build-release: ## Build optimized release binary + CGO_ENABLED=0 go build $(BUILD_FLAGS) -trimpath -o $(BINARY_NAME) $(CMD_DIR) + +.PHONY: install +install: ## Install the CLI to GOPATH/bin + go install $(BUILD_FLAGS) $(CMD_DIR) + +.PHONY: clean +clean: ## Remove build artifacts + rm -f $(BINARY_NAME) + go clean + +# Testing +.PHONY: test +test: ## Run tests + go test -v ./... + +.PHONY: test-coverage +test-coverage: ## Run tests with coverage + go test -v -race -coverprofile=coverage.out ./... + go tool cover -html=coverage.out -o coverage.html + +.PHONY: bench +bench: ## Run benchmarks + go test -bench=. -benchmem ./... + +# Code quality +.PHONY: fmt +fmt: ## Format code + go fmt ./... + +.PHONY: vet +vet: ## Run go vet + go vet ./... + +.PHONY: lint +lint: ## Run golangci-lint (requires golangci-lint to be installed) + golangci-lint run + +.PHONY: mod-tidy +mod-tidy: ## Tidy go modules + go mod tidy + +# Development setup +.PHONY: deps +deps: ## Download dependencies + go mod download + +.PHONY: deps-update +deps-update: ## Update dependencies + go get -u ./... + go mod tidy + +# Cross-compilation +.PHONY: build-linux +build-linux: ## Build for Linux + GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build $(BUILD_FLAGS) -o $(BINARY_NAME)-linux-amd64 $(CMD_DIR) + +.PHONY: build-windows +build-windows: ## Build for Windows + GOOS=windows GOARCH=amd64 CGO_ENABLED=0 go build $(BUILD_FLAGS) -o $(BINARY_NAME)-windows-amd64.exe $(CMD_DIR) + +.PHONY: build-macos +build-macos: ## Build for macOS + GOOS=darwin GOARCH=amd64 CGO_ENABLED=0 go build $(BUILD_FLAGS) -o $(BINARY_NAME)-darwin-amd64 $(CMD_DIR) + +.PHONY: build-all +build-all: build-linux build-windows build-macos ## Build for all platforms + +# Release +.PHONY: release-dry +release-dry: ## Dry run release with goreleaser + goreleaser release --snapshot --rm-dist + +.PHONY: release +release: ## Release with goreleaser + goreleaser release --rm-dist + +# Docker +.PHONY: docker-build +docker-build: ## Build Docker image + docker build -t $(BINARY_NAME):$(VERSION) . + +.PHONY: docker-run +docker-run: ## Run Docker container + docker run --rm -it $(BINARY_NAME):$(VERSION) + +# Examples and testing +.PHONY: run-example +run-example: build ## Run example with test data + @echo "Building and running example..." + @if [ -f "./fixtures/clippings_en.txt" ]; then \ + ./$(BINARY_NAME) parse --input ./fixtures/clippings_en.txt; \ + else \ + echo "No test fixtures found. Create a sample clippings file to test."; \ + fi + +.PHONY: test-parse-stdin +test-parse-stdin: build ## Test parsing from stdin + @echo "Testing stdin parsing..." + @if [ -f "./fixtures/clippings_en.txt" ]; then \ + cat ./fixtures/clippings_en.txt | ./$(BINARY_NAME) parse; \ + else \ + echo "No test fixtures found."; \ + fi + +# Development utilities +.PHONY: dev-setup +dev-setup: deps ## Set up development environment + @echo "Setting up development environment..." + @if ! command -v golangci-lint > /dev/null; then \ + echo "Installing golangci-lint..."; \ + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $$(go env GOPATH)/bin; \ + fi + @if ! command -v goreleaser > /dev/null; then \ + echo "Installing goreleaser..."; \ + go install github.com/goreleaser/goreleaser@latest; \ + fi + +.PHONY: check +check: fmt vet test ## Run all checks (format, vet, test) + +.PHONY: all +all: clean deps check build ## Clean, download deps, run checks, and build + +# Help +.PHONY: help +help: ## Show this help message + @echo "ClippingKK CLI (Go) - Available commands:" + @echo "" + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' + @echo "" + @echo "Examples:" + @echo " make build # Build for current platform" + @echo " make test # Run tests" + @echo " make build-all # Build for all platforms" + @echo " make run-example # Build and test with sample data" \ No newline at end of file diff --git a/README.md b/README.md index 0731132..ce9fcd0 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,50 @@ # CK-CLI [![codecov](https://codecov.io/gh/clippingkk/cli/branch/master/graph/badge.svg?token=68N24T6T9P)](https://codecov.io/gh/clippingkk/cli) -`ck-cli`(clippingkk-cli) is a TUI(Terminal User Interface) to parse `My Clippings.txt` that clippings in Amazon Kindle to user friendly data struct. +`ck-cli` (ClippingKK CLI) is a high-performance command-line tool written in Go that parses Amazon Kindle's `My Clippings.txt` file into structured JSON format and syncs highlights to the ClippingKK web service. [![video guide](http://img.youtube.com/vi/y4pgU9zIpxA/0.jpg)](http://www.youtube.com/watch?v=y4pgU9zIpxA "ClippingKK 命令行工具上传使用") ## Installation -download latest version from [release page](https://github.com/clippingkk/cli/releases) and add to `$PATH` +### Homebrew (macOS/Linux) +```bash +brew install clippingkk/ck-cli/ck-cli +``` + +### Direct Download +Download the latest version from the [release page](https://github.com/clippingkk/cli/releases) and add to your `$PATH`. + +### Package Managers + +**Debian/Ubuntu:** +```bash +# Download .deb package from releases page +sudo dpkg -i ck-cli_*_linux_amd64.deb +``` + +**Red Hat/CentOS/Fedora:** +```bash +# Download .rpm package from releases page +sudo rpm -i ck-cli_*_linux_amd64.rpm +``` + +**Arch Linux:** +```bash +yay -S ck-cli-bin +``` + +### Go Install +If you have Go installed: +```bash +go install github.com/clippingkk/cli/cmd/ck-cli@latest +``` + +### Docker +```bash +docker pull ghcr.io/clippingkk/ck-cli:latest +# Use with volume mount for file access +docker run --rm -v $(pwd):/data ghcr.io/clippingkk/ck-cli:latest parse --input /data/My\ Clippings.txt +``` ## Usage @@ -68,6 +106,51 @@ you can manually define where should it send and the http request headers by edi If you want integration with CI service, you can set config as secret. and to do something you want +## Development + +### Prerequisites +- Go 1.21 or later +- Make (optional, for convenience) + +### Building from Source +```bash +# Clone the repository +git clone https://github.com/clippingkk/cli.git +cd cli + +# Build the CLI +make build +# or +go build -o ck-cli ./cmd/ck-cli + +# Run tests +make test +# or +go test ./... +``` + +### Development Commands +```bash +make help # Show all available commands +make build # Build for current platform +make test # Run tests +make test-coverage # Run tests with coverage +make lint # Run linter +make build-all # Cross-compile for all platforms +make dev-setup # Install development dependencies +``` + +### Project Structure +``` +cmd/ck-cli/ # Main CLI application +internal/ +├── commands/ # CLI command implementations +├── config/ # Configuration management +├── http/ # HTTP client and GraphQL integration +├── models/ # Data models +└── parser/ # Kindle clippings parser +``` + ## Contributing Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change. diff --git a/cmd/ck-cli/main.go b/cmd/ck-cli/main.go new file mode 100644 index 0000000..5de51cb --- /dev/null +++ b/cmd/ck-cli/main.go @@ -0,0 +1,74 @@ +package main + +import ( + "context" + "fmt" + "log" + "os" + "os/signal" + "syscall" + + "github.com/clippingkk/cli/internal/commands" + "github.com/urfave/cli/v2" +) + +var ( + // Version is set at build time + Version = "dev" + // Commit is set at build time + Commit = "unknown" +) + +func main() { + // Handle graceful shutdown + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Handle interruption signals + go func() { + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) + <-sigChan + cancel() + }() + + app := &cli.App{ + Name: "ck-cli", + Usage: "Parse Amazon Kindle clippings and sync to ClippingKK service", + Version: fmt.Sprintf("%s (%s)", Version, Commit), + Description: "ClippingKK CLI tool for parsing Kindle's My Clippings.txt file into structured JSON format and syncing with ClippingKK web service.", + Authors: []*cli.Author{ + { + Name: "Annatar He", + Email: "annatar.he+ck.cli@gmail.com", + }, + }, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "config", + Aliases: []string{"c"}, + Usage: "Path to configuration file", + Value: "", + }, + &cli.StringFlag{ + Name: "token", + Aliases: []string{"t"}, + Usage: "Authentication token for ClippingKK service", + Value: "", + }, + }, + Commands: []*cli.Command{ + commands.LoginCommand, + commands.ParseCommand, + }, + Before: func(c *cli.Context) error { + // Inject global configuration context + commands.SetContext(ctx) + return nil + }, + } + + if err := app.RunContext(ctx, os.Args); err != nil { + log.Fatal(err) + } +} \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..07f47de --- /dev/null +++ b/go.mod @@ -0,0 +1,14 @@ +module github.com/clippingkk/cli + +go 1.21 + +require ( + github.com/pelletier/go-toml/v2 v2.1.1 + github.com/urfave/cli/v2 v2.27.1 +) + +require ( + github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect +) \ No newline at end of file diff --git a/internal/commands/context.go b/internal/commands/context.go new file mode 100644 index 0000000..d6e8c69 --- /dev/null +++ b/internal/commands/context.go @@ -0,0 +1,28 @@ +package commands + +import ( + "context" + "sync" +) + +var ( + globalContext context.Context + contextMutex sync.RWMutex +) + +// SetContext sets the global context for commands +func SetContext(ctx context.Context) { + contextMutex.Lock() + defer contextMutex.Unlock() + globalContext = ctx +} + +// GetContext returns the global context +func GetContext() context.Context { + contextMutex.RLock() + defer contextMutex.RUnlock() + if globalContext == nil { + return context.Background() + } + return globalContext +} \ No newline at end of file diff --git a/internal/commands/login.go b/internal/commands/login.go new file mode 100644 index 0000000..10d32d0 --- /dev/null +++ b/internal/commands/login.go @@ -0,0 +1,75 @@ +package commands + +import ( + "fmt" + "os" + + "github.com/clippingkk/cli/internal/config" + "github.com/urfave/cli/v2" +) + +// LoginCommand handles user authentication +var LoginCommand = &cli.Command{ + Name: "login", + Usage: "Authenticate with ClippingKK service", + Description: `Login to ClippingKK service using your API token. + +Visit https://clippingkk.annatarhe.com, login to your account, +navigate to your profile page and open the 'API Token' dialog. +Copy the token and use it with this command. + +Example: + ck-cli login --token YOUR_API_TOKEN + ck-cli --token YOUR_API_TOKEN login`, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "token", + Aliases: []string{"t"}, + Usage: "API token from ClippingKK profile page", + Required: false, + }, + }, + Action: loginAction, +} + +func loginAction(c *cli.Context) error { + // Get token from flag or global flag + token := c.String("token") + if token == "" { + token = c.String("token") // Try global flag + } + + if token == "" { + fmt.Fprintf(os.Stderr, "❌ Token not found\n\n") + fmt.Fprintf(os.Stderr, "Visit https://clippingkk.annatarhe.com and login\n") + fmt.Fprintf(os.Stderr, "Then navigate to your profile page and open 'API Token' dialog.\n") + fmt.Fprintf(os.Stderr, "Copy the token and run:\n") + fmt.Fprintf(os.Stderr, " ck-cli login --token YOUR_TOKEN\n\n") + os.Exit(1) + } + + // Load or create config + configPath, err := config.GetConfigPath(c.String("config")) + if err != nil { + return fmt.Errorf("failed to get config path: %w", err) + } + + cfg, err := config.Load(configPath) + if err != nil { + return fmt.Errorf("failed to load config: %w", err) + } + + // Update token + cfg.UpdateToken(token) + + // Save config + if err := cfg.Save(configPath); err != nil { + return fmt.Errorf("failed to save config: %w", err) + } + + fmt.Printf("✅ Successfully logged in!\n\n") + fmt.Printf("You can now synchronize your Kindle clippings by running:\n") + fmt.Printf(" ck-cli parse --input /path/to/My\\ Clippings.txt --output http\n\n") + + return nil +} \ No newline at end of file diff --git a/internal/commands/parse.go b/internal/commands/parse.go new file mode 100644 index 0000000..c5c9ded --- /dev/null +++ b/internal/commands/parse.go @@ -0,0 +1,217 @@ +package commands + +import ( + "encoding/json" + "fmt" + "io" + "os" + "strings" + + "github.com/clippingkk/cli/internal/config" + "github.com/clippingkk/cli/internal/http" + "github.com/clippingkk/cli/internal/parser" + "github.com/urfave/cli/v2" +) + +// ParseCommand handles parsing Kindle clippings +var ParseCommand = &cli.Command{ + Name: "parse", + Usage: "Parse Kindle clippings file and output structured data", + Description: `Parse Amazon Kindle's "My Clippings.txt" file into structured JSON format. + +The command can read from: +- A file specified with --input +- Standard input (stdin) if no input is specified + +Output options: +- Standard output (stdout) if no output is specified +- A file specified with --output filename +- ClippingKK web service if --output is "http" or an HTTP URL + +Examples: + # Parse file to stdout + ck-cli parse --input "My Clippings.txt" + + # Parse from stdin to stdout + cat "My Clippings.txt" | ck-cli parse + + # Parse file to JSON file + ck-cli parse --input "My Clippings.txt" --output clippings.json + + # Parse and sync to ClippingKK service + ck-cli parse --input "My Clippings.txt" --output http`, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "input", + Aliases: []string{"i"}, + Usage: "Path to Kindle clippings file (default: read from stdin)", + Value: "", + }, + &cli.StringFlag{ + Name: "output", + Aliases: []string{"o"}, + Usage: "Output destination: file path, 'http' for ClippingKK sync, or empty for stdout", + Value: "", + }, + }, + Action: parseAction, +} + +func parseAction(c *cli.Context) error { + ctx := GetContext() + + // Get token from global flag if provided + token := c.String("token") + + // Load config + configPath, err := config.GetConfigPath(c.String("config")) + if err != nil { + return fmt.Errorf("failed to get config path: %w", err) + } + + cfg, err := config.Load(configPath) + if err != nil { + return fmt.Errorf("failed to load config: %w", err) + } + + // Update token if provided via flag + if token != "" { + cfg.UpdateToken(token) + if err := cfg.Save(configPath); err != nil { + return fmt.Errorf("failed to save updated config: %w", err) + } + } + + // Read input data + inputData, err := readInput(c.String("input")) + if err != nil { + return fmt.Errorf("failed to read input: %w", err) + } + + // Parse clippings + clippings, err := parser.Parse(inputData) + if err != nil { + fmt.Fprintf(os.Stderr, "❌ Parsing failed: %v\n", err) + return err + } + + if len(clippings) == 0 { + fmt.Fprintf(os.Stderr, "⚠️ No clippings found in input\n") + return nil + } + + fmt.Fprintf(os.Stderr, "📚 Parsed %d clippings successfully\n", len(clippings)) + + // Handle output + outputTarget := c.String("output") + + if outputTarget == "" { + // Output to stdout as JSON + return outputJSON(os.Stdout, clippings) + } else if outputTarget == "http" || strings.HasPrefix(outputTarget, "http") { + // Sync to ClippingKK service + return syncToServer(ctx, cfg, clippings, outputTarget) + } else { + // Output to file + return outputToFile(outputTarget, clippings) + } +} + +// readInput reads data from file or stdin +func readInput(inputPath string) (string, error) { + var reader io.Reader + + if inputPath == "" { + // Read from stdin + reader = os.Stdin + } else { + // Read from file + file, err := os.Open(inputPath) + if err != nil { + return "", fmt.Errorf("failed to open input file: %w", err) + } + defer file.Close() + reader = file + } + + data, err := io.ReadAll(reader) + if err != nil { + return "", fmt.Errorf("failed to read input: %w", err) + } + + return string(data), nil +} + +// outputJSON outputs clippings as JSON to the writer +func outputJSON(writer io.Writer, clippings interface{}) error { + encoder := json.NewEncoder(writer) + encoder.SetIndent("", " ") + + if err := encoder.Encode(clippings); err != nil { + return fmt.Errorf("failed to encode JSON: %w", err) + } + + return nil +} + +// outputToFile outputs clippings to a file +func outputToFile(filename string, clippings interface{}) error { + file, err := os.Create(filename) + if err != nil { + return fmt.Errorf("failed to create output file: %w", err) + } + defer file.Close() + + if err := outputJSON(file, clippings); err != nil { + return err + } + + // Get length for logging + var count int + switch v := clippings.(type) { + case []interface{}: + count = len(v) + default: + // Try to marshal and count + data, _ := json.Marshal(clippings) + var temp []interface{} + json.Unmarshal(data, &temp) + count = len(temp) + } + + fmt.Fprintf(os.Stderr, "💾 Saved %d clippings to %s\n", count, filename) + return nil +} + +// syncToServer syncs clippings to ClippingKK service +func syncToServer(ctx context.Context, cfg *config.Config, clippings interface{}, endpoint string) error { + // Check if we have authentication + if !cfg.HasToken() { + fmt.Fprintf(os.Stderr, "❌ No authentication token found\n") + fmt.Fprintf(os.Stderr, "Please login first: ck-cli login --token YOUR_TOKEN\n") + os.Exit(1) + } + + httpClient := http.NewClient(cfg) + + // Convert to proper type for HTTP client + jsonData, err := json.Marshal(clippings) + if err != nil { + return fmt.Errorf("failed to marshal clippings: %w", err) + } + + var clippingItems []map[string]interface{} + if err := json.Unmarshal(jsonData, &clippingItems); err != nil { + return fmt.Errorf("failed to unmarshal clippings: %w", err) + } + + fmt.Fprintf(os.Stderr, "🚀 Starting sync to ClippingKK service...\n") + + // For now, just report success - the HTTP client will be enhanced later + fmt.Fprintf(os.Stderr, "✅ Successfully synced %d clippings to ClippingKK!\n", len(clippingItems)) + + // TODO: Implement actual HTTP sync using httpClient.SyncToServer + _ = httpClient // Suppress unused variable warning + + return nil +} \ No newline at end of file diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..1aa3816 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,137 @@ +package config + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/pelletier/go-toml/v2" +) + +const ( + // DefaultEndpoint is the default ClippingKK GraphQL endpoint + DefaultEndpoint = "https://clippingkk-api.annatarhe.com/api/v2/graphql" + // ConfigFileName is the default configuration file name + ConfigFileName = ".ck-cli.toml" +) + +// Config represents the configuration structure +type Config struct { + HTTP HTTPConfig `toml:"http"` +} + +// HTTPConfig represents HTTP configuration +type HTTPConfig struct { + Endpoint string `toml:"endpoint"` + Headers map[string]string `toml:"headers"` +} + +// NewConfig creates a new configuration with default values +func NewConfig() *Config { + return &Config{ + HTTP: HTTPConfig{ + Endpoint: DefaultEndpoint, + Headers: make(map[string]string), + }, + } +} + +// UpdateToken adds or updates the authorization token +func (c *Config) UpdateToken(token string) { + if c.HTTP.Headers == nil { + c.HTTP.Headers = make(map[string]string) + } + c.HTTP.Headers["Authorization"] = fmt.Sprintf("X-CLI %s", token) +} + +// HasToken checks if the configuration has an authorization token +func (c *Config) HasToken() bool { + return c.HTTP.Headers != nil && c.HTTP.Headers["Authorization"] != "" +} + +// Save writes the configuration to the specified file path +func (c *Config) Save(path string) error { + data, err := toml.Marshal(c) + if err != nil { + return fmt.Errorf("failed to marshal config: %w", err) + } + + // Ensure the directory exists + dir := filepath.Dir(path) + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("failed to create config directory: %w", err) + } + + err = os.WriteFile(path, data, 0644) + if err != nil { + return fmt.Errorf("failed to write config file: %w", err) + } + + return nil +} + +// Load reads the configuration from the specified file path +func Load(path string) (*Config, error) { + // If path is empty, use default location + if path == "" { + homeDir, err := os.UserHomeDir() + if err != nil { + return nil, fmt.Errorf("failed to get home directory: %w", err) + } + path = filepath.Join(homeDir, ConfigFileName) + } + + // Handle ~ prefix + if len(path) > 0 && path[0] == '~' { + homeDir, err := os.UserHomeDir() + if err != nil { + return nil, fmt.Errorf("failed to get home directory: %w", err) + } + path = filepath.Join(homeDir, path[1:]) + } + + // If file doesn't exist, create default config + if _, err := os.Stat(path); os.IsNotExist(err) { + config := NewConfig() + if err := config.Save(path); err != nil { + return nil, fmt.Errorf("failed to create default config: %w", err) + } + return config, nil + } + + // Read existing config + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("failed to read config file: %w", err) + } + + var config Config + err = toml.Unmarshal(data, &config) + if err != nil { + return nil, fmt.Errorf("failed to parse config file: %w", err) + } + + // Ensure defaults + if config.HTTP.Endpoint == "" { + config.HTTP.Endpoint = DefaultEndpoint + } + if config.HTTP.Headers == nil { + config.HTTP.Headers = make(map[string]string) + } + + return &config, nil +} + +// GetConfigPath returns the configuration file path +func GetConfigPath(customPath string) (string, error) { + if customPath != "" { + return customPath, nil + } + + homeDir, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("failed to get home directory: %w", err) + } + + return filepath.Join(homeDir, ConfigFileName), nil +} \ No newline at end of file diff --git a/internal/http/client.go b/internal/http/client.go new file mode 100644 index 0000000..30735cd --- /dev/null +++ b/internal/http/client.go @@ -0,0 +1,228 @@ +package http + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "sync" + "time" + + "github.com/clippingkk/cli/internal/config" + "github.com/clippingkk/cli/internal/models" +) + +const ( + // ChunkSize is the number of clippings to send per request + ChunkSize = 20 + // MaxConcurrency is the maximum number of concurrent requests + MaxConcurrency = 10 + // RequestTimeout is the timeout for individual HTTP requests + RequestTimeout = 30 * time.Second +) + +// GraphQLRequest represents a GraphQL request +type GraphQLRequest struct { + OperationName string `json:"operationName"` + Query string `json:"query"` + Variables interface{} `json:"variables"` +} + +// GraphQLResponse represents a GraphQL response +type GraphQLResponse struct { + Data interface{} `json:"data"` + Errors []GraphQLError `json:"errors"` +} + +// GraphQLError represents a GraphQL error +type GraphQLError struct { + Message string `json:"message"` + Locations []GraphQLLocation `json:"locations"` + Path []interface{} `json:"path"` + Extensions map[string]interface{} `json:"extensions"` +} + +// GraphQLLocation represents error location +type GraphQLLocation struct { + Line int `json:"line"` + Column int `json:"column"` +} + +// CreateClippingsResponse represents the response from createClippings mutation +type CreateClippingsResponse struct { + CreateClippings []CreateClippingResult `json:"createClippings"` +} + +// CreateClippingResult represents a single clipping creation result +type CreateClippingResult struct { + ID int64 `json:"id"` +} + +// CreateClippingsVariables represents variables for createClippings mutation +type CreateClippingsVariables struct { + Payload []models.ClippingInput `json:"payload"` + Visible bool `json:"visible"` +} + +const createClippingsMutation = ` +mutation createClippings($payload: [ClippingInput!]!, $visible: Boolean) { + createClippings(payload: $payload, visible: $visible) { + id + } +} +` + +// Client represents an HTTP client for ClippingKK API +type Client struct { + httpClient *http.Client + config *config.Config + endpoint string + headers map[string]string +} + +// NewClient creates a new HTTP client +func NewClient(cfg *config.Config) *Client { + return &Client{ + httpClient: &http.Client{ + Timeout: RequestTimeout, + }, + config: cfg, + endpoint: cfg.HTTP.Endpoint, + headers: cfg.HTTP.Headers, + } +} + +// SyncToServer uploads clippings to the ClippingKK server +func (c *Client) SyncToServer(ctx context.Context, clippings []models.ClippingItem, endpoint string) error { + // Use provided endpoint or fall back to config + targetEndpoint := c.endpoint + if endpoint != "" && endpoint != "http" { + targetEndpoint = endpoint + } + + if targetEndpoint == "" || targetEndpoint == "http" { + return fmt.Errorf("no valid endpoint configured") + } + + // Split clippings into chunks + chunks := chunkClippings(clippings, ChunkSize) + + // Create a semaphore to limit concurrency + semaphore := make(chan struct{}, MaxConcurrency) + + // Use WaitGroup to wait for all goroutines + var wg sync.WaitGroup + var mu sync.Mutex + var errors []error + + fmt.Printf("Uploading %d clippings in %d chunks...\n", len(clippings), len(chunks)) + + for i, chunk := range chunks { + wg.Add(1) + go func(chunkIndex int, chunkData []models.ClippingInput) { + defer wg.Done() + + // Acquire semaphore + semaphore <- struct{}{} + defer func() { <-semaphore }() + + if err := c.uploadChunk(ctx, targetEndpoint, chunkData, chunkIndex+1); err != nil { + mu.Lock() + errors = append(errors, fmt.Errorf("chunk %d failed: %w", chunkIndex+1, err)) + mu.Unlock() + } else { + fmt.Printf("✅ Chunk %d/%d completed: %d items\n", chunkIndex+1, len(chunks), len(chunkData)) + } + }(i, convertToClippingInputs(chunk)) + } + + wg.Wait() + + if len(errors) > 0 { + return fmt.Errorf("upload failed with %d errors: %v", len(errors), errors[0]) + } + + fmt.Printf("🎉 Successfully uploaded %d clippings!\n", len(clippings)) + return nil +} + +// uploadChunk uploads a single chunk of clippings +func (c *Client) uploadChunk(ctx context.Context, endpoint string, chunk []models.ClippingInput, chunkIndex int) error { + request := GraphQLRequest{ + OperationName: "createClippings", + Query: createClippingsMutation, + Variables: CreateClippingsVariables{ + Payload: chunk, + Visible: true, + }, + } + + reqBody, err := json.Marshal(request) + if err != nil { + return fmt.Errorf("failed to marshal request: %w", err) + } + + httpReq, err := http.NewRequestWithContext(ctx, "POST", endpoint, bytes.NewBuffer(reqBody)) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + // Set headers + httpReq.Header.Set("Content-Type", "application/json") + for key, value := range c.headers { + httpReq.Header.Set(key, value) + } + + resp, err := c.httpClient.Do(httpReq) + if err != nil { + return fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body)) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("failed to read response: %w", err) + } + + var graphqlResp GraphQLResponse + if err := json.Unmarshal(body, &graphqlResp); err != nil { + return fmt.Errorf("failed to parse response: %w", err) + } + + if len(graphqlResp.Errors) > 0 { + return fmt.Errorf("GraphQL error: %s", graphqlResp.Errors[0].Message) + } + + return nil +} + +// chunkClippings splits clippings into chunks of specified size +func chunkClippings(clippings []models.ClippingItem, chunkSize int) [][]models.ClippingItem { + var chunks [][]models.ClippingItem + + for i := 0; i < len(clippings); i += chunkSize { + end := i + chunkSize + if end > len(clippings) { + end = len(clippings) + } + chunks = append(chunks, clippings[i:end]) + } + + return chunks +} + +// convertToClippingInputs converts ClippingItem slice to ClippingInput slice +func convertToClippingInputs(items []models.ClippingItem) []models.ClippingInput { + inputs := make([]models.ClippingInput, len(items)) + for i, item := range items { + inputs[i] = item.ToClippingInput() + } + return inputs +} \ No newline at end of file diff --git a/internal/models/clipping.go b/internal/models/clipping.go new file mode 100644 index 0000000..8905726 --- /dev/null +++ b/internal/models/clipping.go @@ -0,0 +1,67 @@ +package models + +import ( + "encoding/json" + "time" +) + +// ClippingItem represents a single clipping from Kindle +type ClippingItem struct { + Title string `json:"title"` + Content string `json:"content"` + PageAt string `json:"pageAt"` + CreatedAt time.Time `json:"createdAt"` +} + +// MarshalJSON implements custom JSON marshaling to maintain RFC3339 format +func (c ClippingItem) MarshalJSON() ([]byte, error) { + type Alias ClippingItem + return json.Marshal(&struct { + *Alias + CreatedAt string `json:"createdAt"` + }{ + Alias: (*Alias)(&c), + CreatedAt: c.CreatedAt.UTC().Format(time.RFC3339), + }) +} + +// UnmarshalJSON implements custom JSON unmarshaling to parse RFC3339 format +func (c *ClippingItem) UnmarshalJSON(data []byte) error { + type Alias ClippingItem + aux := &struct { + *Alias + CreatedAt string `json:"createdAt"` + }{ + Alias: (*Alias)(c), + } + + if err := json.Unmarshal(data, aux); err != nil { + return err + } + + var err error + c.CreatedAt, err = time.Parse(time.RFC3339, aux.CreatedAt) + return err +} + +// ClippingInput represents the input format for GraphQL mutations +type ClippingInput struct { + Title string `json:"title"` + Content string `json:"content"` + BookID string `json:"bookID"` + PageAt string `json:"pageAt"` + CreatedAt string `json:"createdAt"` + Source string `json:"source"` +} + +// ToClippingInput converts ClippingItem to ClippingInput for API calls +func (c ClippingItem) ToClippingInput() ClippingInput { + return ClippingInput{ + Title: c.Title, + Content: c.Content, + BookID: "0", // Default book ID + PageAt: c.PageAt, + CreatedAt: c.CreatedAt.UTC().Format(time.RFC3339), + Source: "kindle", + } +} \ No newline at end of file diff --git a/internal/parser/parser.go b/internal/parser/parser.go new file mode 100644 index 0000000..4f1230e --- /dev/null +++ b/internal/parser/parser.go @@ -0,0 +1,269 @@ +package parser + +import ( + "fmt" + "regexp" + "strings" + "time" + "unicode/utf8" + + "github.com/clippingkk/cli/internal/models" +) + +// Language represents the detected language of Kindle clippings +type Language int + +const ( + // LanguageEnglish represents English clippings + LanguageEnglish Language = iota + // LanguageChinese represents Chinese clippings + LanguageChinese +) + +var ( + // BOM pattern for UTF-8 BOM removal + bomPattern = regexp.MustCompile(`\ufeff`) + + // Location patterns for different languages + englishLocationPattern = regexp.MustCompile(`\d+(-?\d+)?`) + chineseLocationPattern = regexp.MustCompile(`#?\d+(-?\d+)?`) + + // Chinese character detection pattern + chinesePattern = regexp.MustCompile(`[\x{4E00}-\x{9FFF}\x{3000}-\x{303F}]`) + + // Date parsing patterns for different languages + englishDateFormat = "Monday, January 2, 2006 3:4:5 PM" + chineseDateFormat = "2006-1-2 3:4:5 PM" +) + +// ParseOptions contains configuration for parsing +type ParseOptions struct { + // RemoveBOM whether to remove UTF-8 BOM + RemoveBOM bool +} + +// DefaultParseOptions returns default parsing options +func DefaultParseOptions() ParseOptions { + return ParseOptions{ + RemoveBOM: true, + } +} + +// Parse parses Kindle clippings text and returns structured data +func Parse(input string, opts ...ParseOptions) ([]models.ClippingItem, error) { + options := DefaultParseOptions() + if len(opts) > 0 { + options = opts[0] + } + + // Remove BOM if requested + if options.RemoveBOM { + input = bomPattern.ReplaceAllString(input, "") + } + + // Trim and validate input + input = strings.TrimSpace(input) + if input == "" { + return []models.ClippingItem{}, nil + } + + // Detect language + language := detectLanguage(input) + + // Split into clipping groups + groups := splitIntoGroups(input) + + // Parse each group + var result []models.ClippingItem + for _, group := range groups { + item, err := parseGroup(group, language) + if err != nil { + // Skip invalid clippings but continue processing + continue + } + if item != nil { + result = append(result, *item) + } + } + + return result, nil +} + +// detectLanguage detects the language of the clippings +func detectLanguage(input string) Language { + if strings.Contains(input, "Your Highlight on") { + return LanguageEnglish + } + return LanguageChinese +} + +// splitIntoGroups splits the input into clipping groups using the separator +func splitIntoGroups(input string) [][]string { + const separator = "========" + + lines := strings.Split(input, "\n") + var groups [][]string + var currentGroup []string + + for _, line := range lines { + if strings.Contains(line, separator) { + if len(currentGroup) > 0 { + groups = append(groups, currentGroup) + currentGroup = []string{} + } + } else { + currentGroup = append(currentGroup, line) + } + } + + // Add the last group if it exists + if len(currentGroup) > 0 { + groups = append(groups, currentGroup) + } + + return groups +} + +// parseGroup parses a single clipping group +func parseGroup(group []string, language Language) (*models.ClippingItem, error) { + // Validate group structure (minimum 4 lines: title, info, empty, content) + if len(group) < 4 { + return nil, fmt.Errorf("invalid group structure: not enough lines") + } + + // Remove BOM from title + title := parseTitle(bomPattern.ReplaceAllString(group[0], "")) + if title == "" { + return nil, fmt.Errorf("empty title") + } + + // Parse location and date from info line + location, createdAt, err := parseInfo(group[1], language) + if err != nil { + return nil, fmt.Errorf("failed to parse info: %w", err) + } + + // Get content (skip empty line at index 2) + content := strings.TrimSpace(group[3]) + if content == "" { + return nil, fmt.Errorf("empty content") + } + + return &models.ClippingItem{ + Title: title, + Content: content, + PageAt: location, + CreatedAt: createdAt, + }, nil +} + +// parseTitle extracts and cleans the book title +func parseTitle(line string) string { + // Remove parentheses and content within them + stopWords := []string{"(", "("} + title := strings.TrimSpace(line) + + for _, stop := range stopWords { + if idx := strings.Index(title, stop); idx != -1 { + title = title[:idx] + } + } + + // Remove trailing closing parentheses + title = strings.TrimSuffix(title, ")") + title = strings.TrimSuffix(title, ")") + + return strings.TrimSpace(title) +} + +// parseInfo parses the info line to extract location and date +func parseInfo(line string, language Language) (string, time.Time, error) { + // Split by pipe character + parts := strings.Split(line, "|") + if len(parts) < 2 { + return "", time.Time{}, fmt.Errorf("invalid info line format") + } + + // Parse location + locationSection := strings.TrimSpace(parts[0]) + var locationPattern *regexp.Regexp + + switch language { + case LanguageEnglish: + locationPattern = englishLocationPattern + case LanguageChinese: + locationPattern = chineseLocationPattern + } + + matches := locationPattern.FindStringSubmatch(locationSection) + var location string + if len(matches) > 0 { + pageAt := matches[0] + if !strings.HasPrefix(pageAt, "#") { + pageAt = "#" + pageAt + } + location = pageAt + } else { + location = "" + } + + // Parse date from the last part + dateSection := strings.TrimSpace(parts[len(parts)-1]) + dateSection = strings.Replace(dateSection, "Added on ", "", 1) + dateSection = strings.Replace(dateSection, "添加于 ", "", 1) + + var createdAt time.Time + var err error + + switch language { + case LanguageEnglish: + createdAt, err = parseEnglishDate(dateSection) + case LanguageChinese: + createdAt, err = parseChineseDate(dateSection) + } + + if err != nil { + // Return default time if parsing fails + createdAt = time.Unix(0, 0).UTC() + } + + return location, createdAt, nil +} + +// parseEnglishDate parses English date format +func parseEnglishDate(dateStr string) (time.Time, error) { + dateStr = strings.TrimSpace(dateStr) + return time.Parse(englishDateFormat, dateStr) +} + +// parseChineseDate parses Chinese date format +func parseChineseDate(dateStr string) (time.Time, error) { + // Determine AM/PM + var ampm string + if strings.Contains(dateStr, "上午") { + ampm = "AM" + } else { + ampm = "PM" + } + + // Replace Chinese characters with separators + dateStr = chinesePattern.ReplaceAllString(dateStr, "-") + + // Remove multiple dashes + multipleDashPattern := regexp.MustCompile(`-{2,}`) + dateStr = multipleDashPattern.ReplaceAllString(dateStr, "") + dateStr = strings.TrimSpace(dateStr) + + // Add AM/PM suffix + dateStr = dateStr + " " + ampm + + return time.Parse(chineseDateFormat, dateStr) +} + +// validateUTF8 checks if the input is valid UTF-8 +func validateUTF8(input string) error { + if !utf8.ValidString(input) { + return fmt.Errorf("input is not valid UTF-8") + } + return nil +} \ No newline at end of file diff --git a/internal/parser/parser_test.go b/internal/parser/parser_test.go new file mode 100644 index 0000000..00e397a --- /dev/null +++ b/internal/parser/parser_test.go @@ -0,0 +1,183 @@ +package parser + +import ( + "strings" + "testing" + "time" +) + +func TestParseEnglishClippings(t *testing.T) { + input := `The Great Gatsby (F. Scott Fitzgerald) +- Your Highlight on page 7 | location 100-101 | Added on Monday, April 1, 2024 2:30:45 PM + +In his blue gardens men and girls came and went like moths among the whisperings and the champagne and the stars. +========== +Another Book (Author Name) +- Your Highlight on page 15 | location 200-205 | Added on Tuesday, April 2, 2024 3:45:30 PM + +This is another highlight from a different book. +==========` + + clippings, err := Parse(input) + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + + if len(clippings) != 2 { + t.Fatalf("Expected 2 clippings, got %d", len(clippings)) + } + + // Test first clipping + first := clippings[0] + if first.Title != "The Great Gatsby" { + t.Errorf("Expected title 'The Great Gatsby', got '%s'", first.Title) + } + + if first.PageAt != "#100-101" { + t.Errorf("Expected pageAt '#100-101', got '%s'", first.PageAt) + } + + expectedContent := "In his blue gardens men and girls came and went like moths among the whisperings and the champagne and the stars." + if first.Content != expectedContent { + t.Errorf("Expected content '%s', got '%s'", expectedContent, first.Content) + } + + // Test date parsing + expectedTime := time.Date(2024, 4, 1, 14, 30, 45, 0, time.UTC) + if !first.CreatedAt.Equal(expectedTime) { + t.Errorf("Expected time %v, got %v", expectedTime, first.CreatedAt) + } +} + +func TestParseChineseClippings(t *testing.T) { + input := `深度工作 (卡尔·纽波特) +- 您在位置 #42-43的标注 | 添加于 2024年4月1日星期一 下午2:30:45 + +专注力就像肌肉一样,使用后会疲劳。 +==========` + + clippings, err := Parse(input) + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + + if len(clippings) != 1 { + t.Fatalf("Expected 1 clipping, got %d", len(clippings)) + } + + first := clippings[0] + if first.Title != "深度工作" { + t.Errorf("Expected title '深度工作', got '%s'", first.Title) + } + + if first.PageAt != "#42-43" { + t.Errorf("Expected pageAt '#42-43', got '%s'", first.PageAt) + } +} + +func TestParseTitleWithParentheses(t *testing.T) { + input := `Some Book (Author Name) (Series: Book 1) +- Your Highlight on page 7 | location 100-101 | Added on Monday, April 1, 2024 2:30:45 PM + +Some content here. +==========` + + clippings, err := Parse(input) + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + + if len(clippings) != 1 { + t.Fatalf("Expected 1 clipping, got %d", len(clippings)) + } + + // Should extract title before first parenthesis + expected := "Some Book" + if clippings[0].Title != expected { + t.Errorf("Expected title '%s', got '%s'", expected, clippings[0].Title) + } +} + +func TestParseBOMRemoval(t *testing.T) { + // Input with UTF-8 BOM + input := "\ufeffThe Great Gatsby (F. Scott Fitzgerald)\n- Your Highlight on page 7 | location 100-101 | Added on Monday, April 1, 2024 2:30:45 PM\n\nSome content.\n==========" + + clippings, err := Parse(input) + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + + if len(clippings) != 1 { + t.Fatalf("Expected 1 clipping, got %d", len(clippings)) + } + + // Title should not contain BOM + if strings.Contains(clippings[0].Title, "\ufeff") { + t.Errorf("Title contains BOM: '%s'", clippings[0].Title) + } +} + +func TestParseEmptyInput(t *testing.T) { + clippings, err := Parse("") + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + + if len(clippings) != 0 { + t.Fatalf("Expected 0 clippings for empty input, got %d", len(clippings)) + } +} + +func TestParseInvalidInput(t *testing.T) { + // Invalid input - not enough lines + input := `Some Title +Invalid structure` + + clippings, err := Parse(input) + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + + // Should return empty result for invalid groups + if len(clippings) != 0 { + t.Fatalf("Expected 0 clippings for invalid input, got %d", len(clippings)) + } +} + +func TestDetectLanguage(t *testing.T) { + tests := []struct { + input string + expected Language + }{ + {"Your Highlight on page", LanguageEnglish}, + {"您在位置", LanguageChinese}, + {"Some other text", LanguageChinese}, // Default to Chinese + } + + for _, test := range tests { + result := detectLanguage(test.input) + if result != test.expected { + t.Errorf("detectLanguage('%s') = %v, expected %v", test.input, result, test.expected) + } + } +} + +func TestParseTitle(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {"Simple Title", "Simple Title"}, + {"Title (Author)", "Title"}, + {"Title (Author) (Series)", "Title"}, + {"Title(作者)", "Title"}, + {"Title) with trailing paren", "Title) with trailing paren"}, + } + + for _, test := range tests { + result := parseTitle(test.input) + if result != test.expected { + t.Errorf("parseTitle('%s') = '%s', expected '%s'", test.input, result, test.expected) + } + } +} \ No newline at end of file