text
diff --git a/README.md b/README.md index fb7b12b..760c11b 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ ### Features include: -- **Response formatting**: automatically formats and colors output for supported types (json, msgpack, xml, etc.) +- **Response formatting**: automatically formats and colors output (json, html, msgpack, xml, etc.) - **Image rendering**: render images directly in your terminal - **Compression**: automatic gzip and zstd response body decompression - **Authentication**: support for Basic Auth, Bearer Token, and AWS Signature V4 @@ -67,6 +67,7 @@ To make a GET request to a URL and print the status code to stderr and the respo ```sh fetch example.com ``` +
HTTP/1.1 200 OK
{
diff --git a/docs/USAGE.md b/docs/USAGE.md
index c79741b..0d009dc 100644
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -332,6 +332,14 @@ fetch --colour on example.com
Set whether output should be formatted. Options: `auto`, `off`, `on`.
+Supported formats for automatic formatting and syntax highlighting:
+- JSON (`application/json`)
+- HTML (`text/html`)
+- XML (`application/xml`, `text/xml`)
+- MessagePack (`application/msgpack`)
+- NDJSON/JSONLines (`application/x-ndjson`)
+- Server-Sent Events (`text/event-stream`)
+
```sh
fetch --format off example.com
fetch --format on example.com
diff --git a/internal/fetch/fetch.go b/internal/fetch/fetch.go
index 98bab0c..e6780cb 100644
--- a/internal/fetch/fetch.go
+++ b/internal/fetch/fetch.go
@@ -28,6 +28,7 @@ type ContentType int
const (
TypeUnknown ContentType = iota
+ TypeHTML
TypeImage
TypeJSON
TypeMsgPack
@@ -283,6 +284,10 @@ func formatResponse(ctx context.Context, r *Request, resp *http.Response) (io.Re
}
switch contentType {
+ case TypeHTML:
+ if format.FormatHTML(buf, p) == nil {
+ buf = p.Bytes()
+ }
case TypeImage:
return nil, image.Render(ctx, buf, r.Image == core.ImageNative)
case TypeJSON:
@@ -335,6 +340,8 @@ func getContentType(headers http.Header) ContentType {
}
case "text":
switch subtype {
+ case "html":
+ return TypeHTML
case "event-stream":
return TypeSSE
case "xml":
diff --git a/internal/format/html.go b/internal/format/html.go
new file mode 100644
index 0000000..abd6b2c
--- /dev/null
+++ b/internal/format/html.go
@@ -0,0 +1,377 @@
+package format
+
+import (
+ "bytes"
+ "io"
+ "strings"
+
+ "github.com/ryanfowler/fetch/internal/core"
+
+ "golang.org/x/net/html"
+)
+
+// voidElements are HTML5 elements that have no closing tag.
+var voidElements = map[string]bool{
+ "area": true,
+ "base": true,
+ "br": true,
+ "col": true,
+ "embed": true,
+ "hr": true,
+ "img": true,
+ "input": true,
+ "link": true,
+ "meta": true,
+ "param": true,
+ "source": true,
+ "track": true,
+ "wbr": true,
+}
+
+// blockElements are elements that should start on their own line and indent children.
+var blockElements = map[string]bool{
+ "html": true,
+ "head": true,
+ "body": true,
+ "title": true,
+ "meta": true,
+ "link": true,
+ "base": true,
+ "div": true,
+ "p": true,
+ "h1": true,
+ "h2": true,
+ "h3": true,
+ "h4": true,
+ "h5": true,
+ "h6": true,
+ "ul": true,
+ "ol": true,
+ "li": true,
+ "table": true,
+ "thead": true,
+ "tbody": true,
+ "tfoot": true,
+ "tr": true,
+ "td": true,
+ "th": true,
+ "form": true,
+ "fieldset": true,
+ "section": true,
+ "article": true,
+ "nav": true,
+ "aside": true,
+ "header": true,
+ "footer": true,
+ "main": true,
+ "figure": true,
+ "figcaption": true,
+ "blockquote": true,
+ "pre": true,
+ "address": true,
+ "details": true,
+ "summary": true,
+ "dialog": true,
+ "script": true,
+ "style": true,
+ "noscript": true,
+ "template": true,
+ "canvas": true,
+ "video": true,
+ "audio": true,
+ "iframe": true,
+ "object": true,
+ "select": true,
+ "option": true,
+ "optgroup": true,
+ "datalist": true,
+ "textarea": true,
+ "dl": true,
+ "dt": true,
+ "dd": true,
+ // Void elements that should be on their own line.
+ "hr": true,
+ "br": true,
+ "img": true,
+ "input": true,
+ "area": true,
+ "col": true,
+ "embed": true,
+ "source": true,
+ "track": true,
+ "wbr": true,
+}
+
+// rawTextElements contain content that should not be parsed as HTML.
+var rawTextElements = map[string]bool{
+ "script": true,
+ "style": true,
+}
+
+// preserveWhitespaceElements should not have indentation added to their content.
+var preserveWhitespaceElements = map[string]bool{
+ "pre": true,
+ "textarea": true,
+}
+
+// htmlStackEntry tracks information about an open element.
+type htmlStackEntry struct {
+ tagName string
+ isBlock bool
+ hasBlockChild bool // true if a block-level child has been output
+}
+
+// FormatHTML formats the provided HTML to the Printer.
+func FormatHTML(buf []byte, w *core.Printer) error {
+ tokenizer := html.NewTokenizer(bytes.NewReader(buf))
+
+ var stack []htmlStackEntry
+
+ for {
+ tt := tokenizer.Next()
+
+ switch tt {
+ case html.ErrorToken:
+ err := tokenizer.Err()
+ if err == io.EOF {
+ return nil
+ }
+ return err
+
+ case html.DoctypeToken:
+ // Doctype is always at the start, no need for preceding newline.
+ w.WriteString("\n")
+
+ case html.StartTagToken:
+ tagName, hasAttr := tokenizer.TagName()
+ tagNameStr := string(tagName)
+ tagNameLower := strings.ToLower(tagNameStr)
+
+ isBlock := blockElements[tagNameLower]
+ isVoid := voidElements[tagNameLower]
+
+ // Mark parent as having a block child if this is a block element.
+ if isBlock && len(stack) > 0 {
+ if !stack[len(stack)-1].hasBlockChild {
+ w.WriteString("\n")
+ }
+ stack[len(stack)-1].hasBlockChild = true
+ writeIndent(w, len(stack))
+ }
+
+ w.WriteString("<")
+ writeHTMLTagName(w, tagNameStr)
+ if hasAttr {
+ writeHTMLAttributes(w, tokenizer)
+ }
+ w.WriteString(">")
+
+ if !isVoid {
+ stack = append(stack, htmlStackEntry{
+ tagName: tagNameLower,
+ isBlock: isBlock,
+ hasBlockChild: false,
+ })
+ } else if isBlock {
+ w.WriteString("\n")
+ }
+
+ case html.EndTagToken:
+ tagName, _ := tokenizer.TagName()
+ tagNameStr := string(tagName)
+ tagNameLower := strings.ToLower(tagNameStr)
+
+ // Skip end tags for void elements.
+ if voidElements[tagNameLower] {
+ continue
+ }
+
+ // Find and pop the matching tag from the stack.
+ var entry htmlStackEntry
+ found := false
+ for i := len(stack) - 1; i >= 0; i-- {
+ if stack[i].tagName == tagNameLower {
+ entry = stack[i]
+ stack = stack[:i]
+ found = true
+ break
+ }
+ }
+
+ if entry.isBlock && entry.hasBlockChild {
+ writeIndent(w, len(stack))
+ }
+
+ w.WriteString("")
+ writeHTMLTagName(w, tagNameStr)
+ w.WriteString(">")
+
+ if found && entry.isBlock {
+ w.WriteString("\n")
+ }
+
+ case html.SelfClosingTagToken:
+ tagName, hasAttr := tokenizer.TagName()
+ tagNameStr := string(tagName)
+ tagNameLower := strings.ToLower(tagNameStr)
+
+ isBlock := blockElements[tagNameLower]
+
+ // Mark parent as having a block child if this is a block element.
+ if isBlock && len(stack) > 0 {
+ if !stack[len(stack)-1].hasBlockChild {
+ w.WriteString("\n")
+ }
+ stack[len(stack)-1].hasBlockChild = true
+ writeIndent(w, len(stack))
+ }
+
+ w.WriteString("<")
+ writeHTMLTagName(w, tagNameStr)
+ if hasAttr {
+ writeHTMLAttributes(w, tokenizer)
+ }
+ w.WriteString(">")
+
+ if isBlock {
+ w.WriteString("\n")
+ }
+
+ case html.TextToken:
+ text := tokenizer.Text()
+
+ // Check if we're inside a raw text or whitespace-preserving element.
+ inRawText := false
+ inPreserveWS := false
+ if len(stack) > 0 {
+ currentTag := stack[len(stack)-1].tagName
+ inRawText = rawTextElements[currentTag]
+ inPreserveWS = preserveWhitespaceElements[currentTag]
+ }
+
+ if inRawText || inPreserveWS {
+ // Preserve content exactly.
+ w.Set(core.Green)
+ w.Write(text)
+ w.Reset()
+ } else {
+ // Skip text that is only whitespace (formatting whitespace in source).
+ // For text with content, normalize by trimming leading/trailing whitespace
+ // but preserve space between inline elements.
+ trimmed := bytes.TrimSpace(text)
+ if len(trimmed) > 0 {
+ // Check if original text had leading/trailing spaces that should
+ // be preserved for inline element separation.
+ hasLeadingSpace := len(text) > 0 && (text[0] == ' ' || text[0] == '\t' || text[0] == '\n' || text[0] == '\r')
+ hasTrailingSpace := len(text) > 0 && (text[len(text)-1] == ' ' || text[len(text)-1] == '\t' || text[len(text)-1] == '\n' || text[len(text)-1] == '\r')
+
+ if hasLeadingSpace {
+ w.WriteString(" ")
+ }
+ writeHTMLText(w, trimmed)
+ if hasTrailingSpace {
+ w.WriteString(" ")
+ }
+ }
+ }
+
+ case html.CommentToken:
+ // Comments are treated like block elements.
+ if len(stack) > 0 {
+ if !stack[len(stack)-1].hasBlockChild {
+ w.WriteString("\n")
+ }
+ stack[len(stack)-1].hasBlockChild = true
+ writeIndent(w, len(stack))
+ }
+ w.WriteString("\n")
+ }
+ }
+}
+
+func writeHTMLTagName(p *core.Printer, s string) {
+ p.Set(core.Bold)
+ p.Set(core.Blue)
+ p.WriteString(s)
+ p.Reset()
+}
+
+func writeHTMLAttrName(p *core.Printer, s string) {
+ p.Set(core.Cyan)
+ p.WriteString(s)
+ p.Reset()
+}
+
+func writeHTMLAttrVal(p *core.Printer, s string) {
+ p.Set(core.Green)
+ escapeHTMLAttrValue(p, s)
+ p.Reset()
+}
+
+func writeHTMLText(p *core.Printer, t []byte) {
+ p.Set(core.Green)
+ p.Write(t)
+ p.Reset()
+}
+
+func writeHTMLDoctype(p *core.Printer, t html.Token) {
+ p.Set(core.Cyan)
+ p.WriteString("DOCTYPE ")
+ p.WriteString(t.Data)
+ p.Reset()
+}
+
+func writeHTMLComment(p *core.Printer, s string) {
+ p.Set(core.Dim)
+ p.WriteString(s)
+ p.Reset()
+}
+
+func writeHTMLAttributes(w *core.Printer, tokenizer *html.Tokenizer) {
+ for {
+ key, val, more := tokenizer.TagAttr()
+ if len(key) == 0 && !more {
+ break
+ }
+ if len(key) > 0 {
+ w.WriteString(" ")
+ writeHTMLAttrName(w, string(key))
+ if len(val) > 0 {
+ w.WriteString("=\"")
+ writeHTMLAttrVal(w, string(val))
+ w.WriteString("\"")
+ }
+ }
+ if !more {
+ break
+ }
+ }
+}
+
+// escapeHTMLAttrValue escapes special characters in HTML attribute values.
+func escapeHTMLAttrValue(p *core.Printer, s string) {
+ var last int
+ for i := 0; i < len(s); i++ {
+ var esc string
+ switch s[i] {
+ case '"':
+ esc = """
+ case '&':
+ esc = "&"
+ case '<':
+ esc = "<"
+ case '>':
+ esc = ">"
+ default:
+ continue
+ }
+ p.WriteString(s[last:i])
+ p.WriteString(esc)
+ last = i + 1
+ }
+ p.WriteString(s[last:])
+}
diff --git a/internal/format/html_test.go b/internal/format/html_test.go
new file mode 100644
index 0000000..1863a0b
--- /dev/null
+++ b/internal/format/html_test.go
@@ -0,0 +1,367 @@
+package format
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/ryanfowler/fetch/internal/core"
+)
+
+func TestFormatHTML(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ wantErr bool
+ }{
+ {
+ name: "valid simple html",
+ input: "text",
+ wantErr: false,
+ },
+ {
+ name: "valid nested html",
+ input: "test content",
+ wantErr: false,
+ },
+ {
+ name: "valid html with attributes",
+ input: `text
`,
+ wantErr: false,
+ },
+ {
+ name: "void elements br",
+ input: "line1
line2
",
+ wantErr: false,
+ },
+ {
+ name: "void elements img",
+ input: `
`,
+ wantErr: false,
+ },
+ {
+ name: "void elements input",
+ input: ``,
+ wantErr: false,
+ },
+ {
+ name: "self-closing syntax",
+ input: "
",
+ wantErr: false,
+ },
+ {
+ name: "doctype",
+ input: "",
+ wantErr: false,
+ },
+ {
+ name: "comment",
+ input: "content",
+ wantErr: false,
+ },
+ {
+ name: "script content preservation",
+ input: ``,
+ wantErr: false,
+ },
+ {
+ name: "style content preservation",
+ input: ``,
+ wantErr: false,
+ },
+ {
+ name: "pre whitespace preservation",
+ input: " line1\n line2
",
+ wantErr: false,
+ },
+ {
+ name: "textarea whitespace preservation",
+ input: "",
+ wantErr: false,
+ },
+ {
+ name: "malformed html unclosed tag",
+ input: "unclosed",
+ wantErr: false, // HTML tokenizer handles malformed HTML gracefully
+ },
+ {
+ name: "malformed html mismatched tags",
+ input: "
",
+ wantErr: false, // HTML tokenizer handles malformed HTML gracefully
+ },
+ {
+ name: "boolean attributes",
+ input: ``,
+ wantErr: false,
+ },
+ {
+ name: "multiple attributes",
+ input: `link`,
+ wantErr: false,
+ },
+ {
+ name: "inline elements",
+ input: "Text with bold and italic
",
+ wantErr: false,
+ },
+ {
+ name: "empty input",
+ input: "",
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ p := core.NewHandle(core.ColorOff).Stderr()
+ err := FormatHTML([]byte(tt.input), p)
+ if (err != nil) != tt.wantErr {
+ t.Errorf("FormatHTML() error = %v, wantErr %v", err, tt.wantErr)
+ }
+ })
+ }
+}
+
+func TestFormatHTMLOutput(t *testing.T) {
+ input := "text
"
+ p := core.NewHandle(core.ColorOff).Stderr()
+ err := FormatHTML([]byte(input), p)
+ if err != nil {
+ t.Fatalf("FormatHTML() error = %v", err)
+ }
+
+ output := string(p.Bytes())
+ if !strings.Contains(output, "") {
+ t.Errorf("output should contain , got: %s", output)
+ }
+ if !strings.Contains(output, "") {
+ t.Errorf("output should contain , got: %s", output)
+ }
+ if !strings.Contains(output, "text") {
+ t.Errorf("output should contain text, got: %s", output)
+ }
+}
+
+func TestFormatHTMLIndentation(t *testing.T) {
+ input := "Test content
"
+ p := core.NewHandle(core.ColorOff).Stderr()
+ err := FormatHTML([]byte(input), p)
+ if err != nil {
+ t.Fatalf("FormatHTML() error = %v", err)
+ }
+
+ output := string(p.Bytes())
+
+ // Check that block elements start on new lines with proper indentation.
+ lines := strings.Split(output, "\n")
+ foundIndentedDiv := false
+ for _, line := range lines {
+ if strings.Contains(line, "") && strings.HasPrefix(line, " ") {
+ foundIndentedDiv = true
+ break
+ }
+ }
+ if !foundIndentedDiv {
+ t.Errorf("expected indented , got output:\n%s", output)
+ }
+}
+
+func TestFormatHTMLDoctype(t *testing.T) {
+ input := ""
+ p := core.NewHandle(core.ColorOff).Stderr()
+ err := FormatHTML([]byte(input), p)
+ if err != nil {
+ t.Fatalf("FormatHTML() error = %v", err)
+ }
+
+ output := string(p.Bytes())
+ if !strings.Contains(output, "") {
+ t.Errorf("output should contain , got: %s", output)
+ }
+}
+
+func TestFormatHTMLComment(t *testing.T) {
+ input := "content"
+ p := core.NewHandle(core.ColorOff).Stderr()
+ err := FormatHTML([]byte(input), p)
+ if err != nil {
+ t.Fatalf("FormatHTML() error = %v", err)
+ }
+
+ output := string(p.Bytes())
+ if !strings.Contains(output, "") {
+ t.Errorf("output should contain comment, got: %s", output)
+ }
+}
+
+func TestFormatHTMLVoidElements(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ check string
+ }{
+ {
+ name: "br element",
+ input: "line1
line2
",
+ check: "
",
+ },
+ {
+ name: "hr element",
+ input: "
",
+ check: "
",
+ },
+ {
+ name: "img element",
+ input: `
`,
+ check: `
`,
+ },
+ {
+ name: "input element",
+ input: ``,
+ check: ``,
+ },
+ {
+ name: "meta element",
+ input: ``,
+ check: ``,
+ },
+ {
+ name: "link element",
+ input: ``,
+ check: ``,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ p := core.NewHandle(core.ColorOff).Stderr()
+ err := FormatHTML([]byte(tt.input), p)
+ if err != nil {
+ t.Fatalf("FormatHTML() error = %v", err)
+ }
+
+ output := string(p.Bytes())
+ if !strings.Contains(output, tt.check) {
+ t.Errorf("output should contain %q, got: %s", tt.check, output)
+ }
+ // Void elements should not have closing tags.
+ tagName := strings.Split(strings.TrimPrefix(tt.check, "<"), " ")[0]
+ tagName = strings.TrimSuffix(tagName, ">")
+ closingTag := "" + tagName + ">"
+ if strings.Contains(output, closingTag) {
+ t.Errorf("output should not contain closing tag %s for void element, got: %s", closingTag, output)
+ }
+ })
+ }
+}
+
+func TestFormatHTMLPreservesRawText(t *testing.T) {
+ input := ``
+ p := core.NewHandle(core.ColorOff).Stderr()
+ err := FormatHTML([]byte(input), p)
+ if err != nil {
+ t.Fatalf("FormatHTML() error = %v", err)
+ }
+
+ output := string(p.Bytes())
+ // The raw content should be preserved.
+ if !strings.Contains(output, `if (x < 5 && y > 3)`) {
+ t.Errorf("script content should be preserved, got: %s", output)
+ }
+}
+
+func TestFormatHTMLPreservesPreWhitespace(t *testing.T) {
+ input := " line1\n line2
"
+ p := core.NewHandle(core.ColorOff).Stderr()
+ err := FormatHTML([]byte(input), p)
+ if err != nil {
+ t.Fatalf("FormatHTML() error = %v", err)
+ }
+
+ output := string(p.Bytes())
+ // The whitespace should be preserved.
+ if !strings.Contains(output, " line1") {
+ t.Errorf("pre whitespace should be preserved, got: %s", output)
+ }
+ if !strings.Contains(output, " line2") {
+ t.Errorf("pre whitespace should be preserved, got: %s", output)
+ }
+}
+
+func TestFormatHTMLPlanExample(t *testing.T) {
+ input := `Test Hello
Text with bold
`
+ expected := `
+
+
+ Test
+
+
+
+ Hello
+ Text with bold
+
+
+
+
+
+`
+ p := core.NewHandle(core.ColorOff).Stderr()
+ err := FormatHTML([]byte(input), p)
+ if err != nil {
+ t.Fatalf("FormatHTML() error = %v", err)
+ }
+
+ output := string(p.Bytes())
+ if output != expected {
+ t.Errorf("FormatHTML() output mismatch.\nGot:\n%s\nExpected:\n%s", output, expected)
+ }
+}
+
+func TestEscapeHTMLAttrValue(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ want string
+ }{
+ {
+ name: "no escape needed",
+ input: "hello world",
+ want: "hello world",
+ },
+ {
+ name: "with ampersand",
+ input: "foo & bar",
+ want: "foo & bar",
+ },
+ {
+ name: "with less than",
+ input: "a < b",
+ want: "a < b",
+ },
+ {
+ name: "with greater than",
+ input: "a > b",
+ want: "a > b",
+ },
+ {
+ name: "with quotes",
+ input: `say "hello"`,
+ want: "say "hello"",
+ },
+ {
+ name: "mixed special chars",
+ input: ``,
+ want: "<script>"alert('&')"</script>",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ p := core.NewHandle(core.ColorOff).Stderr()
+ escapeHTMLAttrValue(p, tt.input)
+ got := string(p.Bytes())
+ if got != tt.want {
+ t.Errorf("escapeHTMLAttrValue() = %q, want %q", got, tt.want)
+ }
+ })
+ }
+}