diff --git a/sdl/README.md b/sdl/README.md new file mode 100644 index 0000000..7dd385a --- /dev/null +++ b/sdl/README.md @@ -0,0 +1,46 @@ +# sdl + +⚠️ This package is experimental and subject to change without warning. ⚠️ + +This package defines a schema definition language (SDL) for API-fu. The SDL can be used to define the schema of an API, which can then be used to generate code for the API in various languages. + +It is inspired by the GraphQL SDL, but is designed to describe a schema in sufficient detail to produce GraphQL server implementations, JSON:API server implementations, and client SDKs. + +Here's an example: + +``` +resource Person { + type: "people" + + attributes { + firstName: String! + lastName: String! + } +} + +interface Pet { + attributes { + name: String! + } + + relationships { + owner: Person! + } +} + +resource Dog implements Pet { + type: "dogs" + + attributes { + barkDecibels: Int! + } +} + +resource Cat implements Pet { + type: "cats" + + attributes { + meowDecibels: Int! + } +} +``` diff --git a/sdl/ast/ast.go b/sdl/ast/ast.go new file mode 100644 index 0000000..e2be35c --- /dev/null +++ b/sdl/ast/ast.go @@ -0,0 +1,95 @@ +package ast + +import "github.com/ccbrown/api-fu/sdl/token" + +type Node interface { + Position() token.Position +} + +type Document struct { + Definitions []Definition +} + +func (*Document) Position() token.Position { return token.Position{1, 1} } + +// InterfaceDefinition or ResourceDefinition +type Definition interface { + Node +} + +type Name struct { + Name string + NamePosition token.Position +} + +func (n *Name) Position() token.Position { return n.NamePosition } + +type InterfaceDefinition struct { + Name *Name + Extends []*Name + + Attributes *Attributes + Relationships *Relationships +} + +func (n *InterfaceDefinition) Position() token.Position { return n.Name.Position() } + +type ResourceDefinition struct { + Name *Name + Extends []*Name + + Type *StringValue + Attributes *Attributes + Relationships *Relationships +} + +func (n *ResourceDefinition) Position() token.Position { return n.Name.Position() } + +type Attributes struct { + Opening token.Position + Closing token.Position + Fields []*Field +} + +func (n *Attributes) Position() token.Position { return n.Opening } + +type Relationships struct { + Opening token.Position + Closing token.Position + Fields []*Field +} + +func (n *Relationships) Position() token.Position { return n.Opening } + +type StringValue struct { + // Value is the actual, unquoted value. + Value string + + Literal token.Position +} + +func (n *StringValue) Position() token.Position { return n.Literal } + +type Field struct { + Name *Name + Type Type +} + +func (n *Field) Position() token.Position { return n.Name.Position() } + +// NamedType or RequiredType +type Type interface { + Node +} + +type RequiredType struct { + Type Type +} + +func (n *RequiredType) Position() token.Position { return n.Type.Position() } + +type NamedType struct { + Name *Name +} + +func (n *NamedType) Position() token.Position { return n.Name.Position() } diff --git a/sdl/scanner/scanner.go b/sdl/scanner/scanner.go new file mode 100644 index 0000000..d91b94c --- /dev/null +++ b/sdl/scanner/scanner.go @@ -0,0 +1,195 @@ +package scanner + +import ( + "fmt" + "unicode/utf8" + + "github.com/ccbrown/api-fu/sdl/token" +) + +type Error struct { + Message string + Line int + Column int +} + +func (err *Error) Error() string { + return err.Message +} + +type Scanner struct { + src []byte + mode Mode + offset int + line int + column int + errors []*Error + + nextRune rune + nextRuneSize int + + token token.Token + tokenOffset int + tokenPosition token.Position + tokenLength int + tokenStringValue string +} + +type Mode uint + +const ( + ScanIgnored Mode = 1 << iota +) + +func New(src []byte, mode Mode) *Scanner { + s := &Scanner{ + src: src, + mode: mode, + line: 1, + column: 1, + } + s.readNextRune() + return s +} + +func (s *Scanner) Errors() []*Error { + return s.errors +} + +func (s *Scanner) errorf(message string, args ...interface{}) { + s.errors = append(s.errors, &Error{ + Message: fmt.Sprintf(message, args...), + Line: s.line, + Column: s.column, + }) +} + +func (s *Scanner) readNextRune() { + if s.isDone() { + s.nextRune = -1 + s.nextRuneSize = 0 + } else if r, size := utf8.DecodeRune(s.src[s.offset:]); r == utf8.RuneError && size != 0 { + s.nextRune = r + s.nextRuneSize = 1 + } else { + s.nextRune = r + s.nextRuneSize = size + } +} + +func (s *Scanner) peek() rune { + r, _ := utf8.DecodeRune(s.src[s.offset+s.nextRuneSize:]) + return r +} + +func (s *Scanner) consumeRune() rune { + r := s.nextRune + s.offset += s.nextRuneSize + s.readNextRune() + if r == '\n' || (r == '\r' && s.nextRune != '\n') { + s.line++ + s.column = 1 + } else { + s.column++ + } + return r +} + +func (s *Scanner) consumeName() bool { + if r := s.nextRune; r == '_' || (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') { + s.consumeRune() + for !s.isDone() { + if r := s.nextRune; r == '_' || (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') { + s.consumeRune() + } else { + break + } + } + return true + } + return false +} + +func isSourceCharacter(r rune) bool { + return r == '\t' || r == '\n' || r == '\r' || (r >= 0x20 && r <= 0xffff) +} + +func (s *Scanner) isDone() bool { + return len(s.src) == s.offset +} + +func (s *Scanner) Scan() bool { + for { + s.token = token.INVALID + s.tokenOffset = s.offset + s.tokenPosition = token.Position{ + Line: s.line, + Column: s.column, + } + + if s.isDone() { + return false + } + + switch s.nextRune { + case '\t', ' ': + s.consumeRune() + s.token = token.WHITE_SPACE + case ':', '=', '{', '}', '!': + s.consumeRune() + s.token = token.PUNCTUATOR + case '\r', '\n': + if s.consumeRune() == '\r' && s.nextRune == '\n' { + s.consumeRune() + } + s.token = token.LINE_TERMINATOR + case '"': + s.tokenStringValue = s.consumeStringValue() + s.token = token.STRING_VALUE + case utf8.RuneError: + s.errorf("invalid utf-8 character") + s.consumeRune() + case 0xfeff: + if s.offset == 0 { + s.token = token.UNICODE_BOM + } else { + s.errorf("illegal byte order mark") + } + s.consumeRune() + default: + if s.consumeName() { + s.token = token.NAME + } else { + s.errorf("illegal character: %#U", s.nextRune) + s.consumeRune() + } + } + + if s.token == token.INVALID || (s.token.IsIgnored() && (s.mode&ScanIgnored) == 0) { + continue + } + + s.tokenLength = s.offset - s.tokenOffset + return true + } +} + +func (s *Scanner) Token() token.Token { + return s.token +} + +func (s *Scanner) Position() token.Position { + return s.tokenPosition +} + +func (s *Scanner) Literal() string { + return string(s.src[s.tokenOffset : s.tokenOffset+s.tokenLength]) +} + +func (s *Scanner) StringValue() string { + if s.token == token.STRING_VALUE { + return s.tokenStringValue + } else { + return s.Literal() + } +} diff --git a/sdl/scanner/scanner_test.go b/sdl/scanner/scanner_test.go new file mode 100644 index 0000000..d68179e --- /dev/null +++ b/sdl/scanner/scanner_test.go @@ -0,0 +1,143 @@ +package scanner + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/ccbrown/api-fu/sdl/token" +) + +func TestScanner(t *testing.T) { + s := New([]byte(`{`+"\n"+`foo {`+"\r\n"+` bar: "baz"}`+"\r"+`}`), ScanIgnored) + for _, expected := range []struct { + Token token.Token + Literal string + Line int + Column int + }{ + {token.PUNCTUATOR, "{", 1, 1}, + {token.LINE_TERMINATOR, "\n", 1, 2}, + {token.NAME, "foo", 2, 1}, + {token.WHITE_SPACE, " ", 2, 4}, + {token.PUNCTUATOR, "{", 2, 5}, + {token.LINE_TERMINATOR, "\r\n", 2, 6}, + {token.WHITE_SPACE, " ", 3, 1}, + {token.NAME, "bar", 3, 2}, + {token.PUNCTUATOR, ":", 3, 5}, + {token.WHITE_SPACE, " ", 3, 6}, + {token.STRING_VALUE, `"baz"`, 3, 7}, + {token.PUNCTUATOR, "}", 3, 12}, + {token.LINE_TERMINATOR, "\r", 3, 13}, + {token.PUNCTUATOR, "}", 4, 1}, + } { + require.True(t, s.Scan()) + assert.Equal(t, expected.Token, s.Token()) + assert.Equal(t, expected.Literal, s.Literal()) + assert.Equal(t, expected.Line, s.Position().Line) + assert.Equal(t, expected.Column, s.Position().Column) + } + assert.False(t, s.Scan()) + assert.Empty(t, s.Errors()) +} + +func TestScanner_IllegalCharacter(t *testing.T) { + s := New([]byte(`{😃}`), 0) + var tokens []token.Token + var literals []string + for s.Scan() { + tokens = append(tokens, s.Token()) + literals = append(literals, s.Literal()) + } + assert.Equal(t, []token.Token{token.PUNCTUATOR, token.PUNCTUATOR}, tokens) + assert.Equal(t, []string{"{", "}"}, literals) + require.Len(t, s.Errors(), 1) + err := s.Errors()[0] + assert.Equal(t, 1, err.Line) + assert.Equal(t, 2, err.Column) +} + +func TestScanner_IllegalUTF8Character(t *testing.T) { + s := New([]byte("\xc3"), 0) + s.Scan() + require.Len(t, s.Errors(), 1) + assert.Equal(t, 1, s.Errors()[0].Column) +} + +func TestScanner_Strings(t *testing.T) { + for src, value := range map[string]string{ + `"simple"`: `simple`, + `" white space "`: ` white space `, + `"quote \""`: `quote "`, + `"escaped \n\r\b\t\f"`: "escaped \n\r\b\t\f", + `"slashes \\ \/"`: `slashes \ /`, + `"unicode \u1234\u5678\u90AB\uCDef"`: "unicode \u1234\u5678\u90AB\uCDEF", + } { + s := New([]byte(src), ScanIgnored) + assert.True(t, s.Scan()) + assert.Equal(t, src, s.Literal()) + assert.Equal(t, value, s.StringValue()) + assert.False(t, s.Scan()) + assert.Empty(t, s.Errors()) + } + + for name, tc := range map[string]struct { + Source string + ExpectedLiteral string + ExpectedErrorColumn int + }{ + "BadEscapeSequence": {`"\x"`, `"\x"`, 3}, + "BadUnicodeEscapeSequence": {`"\ufooo"`, `"\ufooo"`, 5}, + "Unterminated": {`"foo` + "\n" + `"`, `"foo`, 5}, + "IllegalCharacter": {`"👾"`, `"👾"`, 2}, + } { + t.Run(name, func(t *testing.T) { + s := New([]byte(tc.Source), 0) + assert.True(t, s.Scan()) + assert.Equal(t, tc.ExpectedLiteral, s.Literal()) + require.NotEmpty(t, s.Errors()) + assert.NotEmpty(t, s.Errors()[0].Error()) + assert.Equal(t, 1, s.Errors()[0].Line) + assert.Equal(t, tc.ExpectedErrorColumn, s.Errors()[0].Column) + }) + } +} + +func TestScanner_BOM(t *testing.T) { + s := New([]byte("\ufefffoo"), ScanIgnored) + var tokens []token.Token + for s.Scan() { + tokens = append(tokens, s.Token()) + } + assert.Equal(t, []token.Token{token.UNICODE_BOM, token.NAME}, tokens) + assert.Empty(t, s.Errors()) + + t.Run("IllegalPosition", func(t *testing.T) { + s := New([]byte("foo\ufeff"), ScanIgnored) + assert.True(t, s.Scan()) + assert.False(t, s.Scan()) + require.Len(t, s.Errors(), 1) + assert.Equal(t, 4, s.Errors()[0].Column) + }) +} + +func TestScanner_SkipsIgnored(t *testing.T) { + s := New([]byte("{\n foo {\n bar\n } \n}"), 0) + var tokens []token.Token + var literals []string + for s.Scan() { + tokens = append(tokens, s.Token()) + literals = append(literals, s.Literal()) + } + assert.Equal(t, []token.Token{ + token.PUNCTUATOR, + token.NAME, + token.PUNCTUATOR, + token.NAME, + token.PUNCTUATOR, + token.PUNCTUATOR, + }, tokens) + assert.Equal(t, []string{"{", "foo", "{", "bar", "}", "}"}, literals) + assert.Empty(t, s.Errors()) +} diff --git a/sdl/scanner/string_value.go b/sdl/scanner/string_value.go new file mode 100644 index 0000000..f93d503 --- /dev/null +++ b/sdl/scanner/string_value.go @@ -0,0 +1,84 @@ +package scanner + +func hexRuneValue(r rune) rune { + if r >= '0' && r <= '9' { + return r - '0' + } else if r >= 'a' && r <= 'f' { + return 10 + r - 'a' + } else if r >= 'A' && r <= 'F' { + return 10 + r - 'A' + } + return -1 +} + +func (s *Scanner) consumeStringValue() string { + s.consumeRune() // '"' + + value := "" + + terminated := false + isEscaped := false + for !terminated && !s.isDone() { + if isEscaped { + consumed := false + switch s.nextRune { + case '"', '\\', '/': + value += string(s.nextRune) + case 'b': + value += string('\b') + case 'f': + value += string('\f') + case 'n': + value += string('\n') + case 'r': + value += string('\r') + case 't': + value += string('\t') + case 'u': + s.consumeRune() + consumed = true + + var code rune + for i := 0; i < 4; i++ { + if v := hexRuneValue(s.nextRune); v < 0 { + s.errorf("illegal unicode escape sequence") + break + } else { + code = (code << 4) | v + s.consumeRune() + } + } + value += string(code) + default: + s.errorf("illegal escape sequence") + } + if !consumed { + s.consumeRune() + } + isEscaped = false + continue + } + + if s.nextRune == '\n' || s.nextRune == '\r' { + break + } else if s.nextRune == '\\' { + s.consumeRune() + isEscaped = true + } else if s.nextRune == '"' { + s.consumeRune() + terminated = true + } else if !isSourceCharacter(s.nextRune) { + s.errorf("illegal character %#U in string", s.nextRune) + s.consumeRune() + } else { + value += string(s.nextRune) + s.consumeRune() + } + } + + if !terminated { + s.errorf("unterminated string") + } + + return value +} diff --git a/sdl/token/token.go b/sdl/token/token.go new file mode 100644 index 0000000..ee42759 --- /dev/null +++ b/sdl/token/token.go @@ -0,0 +1,29 @@ +package token + +type Token int + +const ( + INVALID Token = iota + + PUNCTUATOR + NAME + STRING_VALUE + + UNICODE_BOM + WHITE_SPACE + LINE_TERMINATOR +) + +func (t Token) IsIgnored() bool { + switch t { + case UNICODE_BOM, WHITE_SPACE, LINE_TERMINATOR: + return true + default: + return false + } +} + +type Position struct { + Line int + Column int +}