diff --git a/README.md b/README.md index fb7b12b..760c11b 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ ### Features include: -- **Response formatting**: automatically formats and colors output for supported types (json, msgpack, xml, etc.) +- **Response formatting**: automatically formats and colors output (json, html, msgpack, xml, etc.) - **Image rendering**: render images directly in your terminal - **Compression**: automatic gzip and zstd response body decompression - **Authentication**: support for Basic Auth, Bearer Token, and AWS Signature V4 @@ -67,6 +67,7 @@ To make a GET request to a URL and print the status code to stderr and the respo ```sh fetch example.com ``` +
HTTP/1.1 200 OK
 
 {
diff --git a/docs/USAGE.md b/docs/USAGE.md
index c79741b..0d009dc 100644
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -332,6 +332,14 @@ fetch --colour on example.com
 
 Set whether output should be formatted. Options: `auto`, `off`, `on`.
 
+Supported formats for automatic formatting and syntax highlighting:
+- JSON (`application/json`)
+- HTML (`text/html`)
+- XML (`application/xml`, `text/xml`)
+- MessagePack (`application/msgpack`)
+- NDJSON/JSONLines (`application/x-ndjson`)
+- Server-Sent Events (`text/event-stream`)
+
 ```sh
 fetch --format off example.com
 fetch --format on example.com
diff --git a/internal/fetch/fetch.go b/internal/fetch/fetch.go
index 98bab0c..e6780cb 100644
--- a/internal/fetch/fetch.go
+++ b/internal/fetch/fetch.go
@@ -28,6 +28,7 @@ type ContentType int
 
 const (
 	TypeUnknown ContentType = iota
+	TypeHTML
 	TypeImage
 	TypeJSON
 	TypeMsgPack
@@ -283,6 +284,10 @@ func formatResponse(ctx context.Context, r *Request, resp *http.Response) (io.Re
 	}
 
 	switch contentType {
+	case TypeHTML:
+		if format.FormatHTML(buf, p) == nil {
+			buf = p.Bytes()
+		}
 	case TypeImage:
 		return nil, image.Render(ctx, buf, r.Image == core.ImageNative)
 	case TypeJSON:
@@ -335,6 +340,8 @@ func getContentType(headers http.Header) ContentType {
 			}
 		case "text":
 			switch subtype {
+			case "html":
+				return TypeHTML
 			case "event-stream":
 				return TypeSSE
 			case "xml":
diff --git a/internal/format/html.go b/internal/format/html.go
new file mode 100644
index 0000000..abd6b2c
--- /dev/null
+++ b/internal/format/html.go
@@ -0,0 +1,377 @@
+package format
+
+import (
+	"bytes"
+	"io"
+	"strings"
+
+	"github.com/ryanfowler/fetch/internal/core"
+
+	"golang.org/x/net/html"
+)
+
+// voidElements are HTML5 elements that have no closing tag.
+var voidElements = map[string]bool{
+	"area":   true,
+	"base":   true,
+	"br":     true,
+	"col":    true,
+	"embed":  true,
+	"hr":     true,
+	"img":    true,
+	"input":  true,
+	"link":   true,
+	"meta":   true,
+	"param":  true,
+	"source": true,
+	"track":  true,
+	"wbr":    true,
+}
+
+// blockElements are elements that should start on their own line and indent children.
+var blockElements = map[string]bool{
+	"html":       true,
+	"head":       true,
+	"body":       true,
+	"title":      true,
+	"meta":       true,
+	"link":       true,
+	"base":       true,
+	"div":        true,
+	"p":          true,
+	"h1":         true,
+	"h2":         true,
+	"h3":         true,
+	"h4":         true,
+	"h5":         true,
+	"h6":         true,
+	"ul":         true,
+	"ol":         true,
+	"li":         true,
+	"table":      true,
+	"thead":      true,
+	"tbody":      true,
+	"tfoot":      true,
+	"tr":         true,
+	"td":         true,
+	"th":         true,
+	"form":       true,
+	"fieldset":   true,
+	"section":    true,
+	"article":    true,
+	"nav":        true,
+	"aside":      true,
+	"header":     true,
+	"footer":     true,
+	"main":       true,
+	"figure":     true,
+	"figcaption": true,
+	"blockquote": true,
+	"pre":        true,
+	"address":    true,
+	"details":    true,
+	"summary":    true,
+	"dialog":     true,
+	"script":     true,
+	"style":      true,
+	"noscript":   true,
+	"template":   true,
+	"canvas":     true,
+	"video":      true,
+	"audio":      true,
+	"iframe":     true,
+	"object":     true,
+	"select":     true,
+	"option":     true,
+	"optgroup":   true,
+	"datalist":   true,
+	"textarea":   true,
+	"dl":         true,
+	"dt":         true,
+	"dd":         true,
+	// Void elements that should be on their own line.
+	"hr":     true,
+	"br":     true,
+	"img":    true,
+	"input":  true,
+	"area":   true,
+	"col":    true,
+	"embed":  true,
+	"source": true,
+	"track":  true,
+	"wbr":    true,
+}
+
+// rawTextElements contain content that should not be parsed as HTML.
+var rawTextElements = map[string]bool{
+	"script": true,
+	"style":  true,
+}
+
+// preserveWhitespaceElements should not have indentation added to their content.
+var preserveWhitespaceElements = map[string]bool{
+	"pre":      true,
+	"textarea": true,
+}
+
+// htmlStackEntry tracks information about an open element.
+type htmlStackEntry struct {
+	tagName       string
+	isBlock       bool
+	hasBlockChild bool // true if a block-level child has been output
+}
+
+// FormatHTML formats the provided HTML to the Printer.
+func FormatHTML(buf []byte, w *core.Printer) error {
+	tokenizer := html.NewTokenizer(bytes.NewReader(buf))
+
+	var stack []htmlStackEntry
+
+	for {
+		tt := tokenizer.Next()
+
+		switch tt {
+		case html.ErrorToken:
+			err := tokenizer.Err()
+			if err == io.EOF {
+				return nil
+			}
+			return err
+
+		case html.DoctypeToken:
+			// Doctype is always at the start, no need for preceding newline.
+			w.WriteString("\n")
+
+		case html.StartTagToken:
+			tagName, hasAttr := tokenizer.TagName()
+			tagNameStr := string(tagName)
+			tagNameLower := strings.ToLower(tagNameStr)
+
+			isBlock := blockElements[tagNameLower]
+			isVoid := voidElements[tagNameLower]
+
+			// Mark parent as having a block child if this is a block element.
+			if isBlock && len(stack) > 0 {
+				if !stack[len(stack)-1].hasBlockChild {
+					w.WriteString("\n")
+				}
+				stack[len(stack)-1].hasBlockChild = true
+				writeIndent(w, len(stack))
+			}
+
+			w.WriteString("<")
+			writeHTMLTagName(w, tagNameStr)
+			if hasAttr {
+				writeHTMLAttributes(w, tokenizer)
+			}
+			w.WriteString(">")
+
+			if !isVoid {
+				stack = append(stack, htmlStackEntry{
+					tagName:       tagNameLower,
+					isBlock:       isBlock,
+					hasBlockChild: false,
+				})
+			} else if isBlock {
+				w.WriteString("\n")
+			}
+
+		case html.EndTagToken:
+			tagName, _ := tokenizer.TagName()
+			tagNameStr := string(tagName)
+			tagNameLower := strings.ToLower(tagNameStr)
+
+			// Skip end tags for void elements.
+			if voidElements[tagNameLower] {
+				continue
+			}
+
+			// Find and pop the matching tag from the stack.
+			var entry htmlStackEntry
+			found := false
+			for i := len(stack) - 1; i >= 0; i-- {
+				if stack[i].tagName == tagNameLower {
+					entry = stack[i]
+					stack = stack[:i]
+					found = true
+					break
+				}
+			}
+
+			if entry.isBlock && entry.hasBlockChild {
+				writeIndent(w, len(stack))
+			}
+
+			w.WriteString("")
+
+			if found && entry.isBlock {
+				w.WriteString("\n")
+			}
+
+		case html.SelfClosingTagToken:
+			tagName, hasAttr := tokenizer.TagName()
+			tagNameStr := string(tagName)
+			tagNameLower := strings.ToLower(tagNameStr)
+
+			isBlock := blockElements[tagNameLower]
+
+			// Mark parent as having a block child if this is a block element.
+			if isBlock && len(stack) > 0 {
+				if !stack[len(stack)-1].hasBlockChild {
+					w.WriteString("\n")
+				}
+				stack[len(stack)-1].hasBlockChild = true
+				writeIndent(w, len(stack))
+			}
+
+			w.WriteString("<")
+			writeHTMLTagName(w, tagNameStr)
+			if hasAttr {
+				writeHTMLAttributes(w, tokenizer)
+			}
+			w.WriteString(">")
+
+			if isBlock {
+				w.WriteString("\n")
+			}
+
+		case html.TextToken:
+			text := tokenizer.Text()
+
+			// Check if we're inside a raw text or whitespace-preserving element.
+			inRawText := false
+			inPreserveWS := false
+			if len(stack) > 0 {
+				currentTag := stack[len(stack)-1].tagName
+				inRawText = rawTextElements[currentTag]
+				inPreserveWS = preserveWhitespaceElements[currentTag]
+			}
+
+			if inRawText || inPreserveWS {
+				// Preserve content exactly.
+				w.Set(core.Green)
+				w.Write(text)
+				w.Reset()
+			} else {
+				// Skip text that is only whitespace (formatting whitespace in source).
+				// For text with content, normalize by trimming leading/trailing whitespace
+				// but preserve space between inline elements.
+				trimmed := bytes.TrimSpace(text)
+				if len(trimmed) > 0 {
+					// Check if original text had leading/trailing spaces that should
+					// be preserved for inline element separation.
+					hasLeadingSpace := len(text) > 0 && (text[0] == ' ' || text[0] == '\t' || text[0] == '\n' || text[0] == '\r')
+					hasTrailingSpace := len(text) > 0 && (text[len(text)-1] == ' ' || text[len(text)-1] == '\t' || text[len(text)-1] == '\n' || text[len(text)-1] == '\r')
+
+					if hasLeadingSpace {
+						w.WriteString(" ")
+					}
+					writeHTMLText(w, trimmed)
+					if hasTrailingSpace {
+						w.WriteString(" ")
+					}
+				}
+			}
+
+		case html.CommentToken:
+			// Comments are treated like block elements.
+			if len(stack) > 0 {
+				if !stack[len(stack)-1].hasBlockChild {
+					w.WriteString("\n")
+				}
+				stack[len(stack)-1].hasBlockChild = true
+				writeIndent(w, len(stack))
+			}
+			w.WriteString("\n")
+		}
+	}
+}
+
+func writeHTMLTagName(p *core.Printer, s string) {
+	p.Set(core.Bold)
+	p.Set(core.Blue)
+	p.WriteString(s)
+	p.Reset()
+}
+
+func writeHTMLAttrName(p *core.Printer, s string) {
+	p.Set(core.Cyan)
+	p.WriteString(s)
+	p.Reset()
+}
+
+func writeHTMLAttrVal(p *core.Printer, s string) {
+	p.Set(core.Green)
+	escapeHTMLAttrValue(p, s)
+	p.Reset()
+}
+
+func writeHTMLText(p *core.Printer, t []byte) {
+	p.Set(core.Green)
+	p.Write(t)
+	p.Reset()
+}
+
+func writeHTMLDoctype(p *core.Printer, t html.Token) {
+	p.Set(core.Cyan)
+	p.WriteString("DOCTYPE ")
+	p.WriteString(t.Data)
+	p.Reset()
+}
+
+func writeHTMLComment(p *core.Printer, s string) {
+	p.Set(core.Dim)
+	p.WriteString(s)
+	p.Reset()
+}
+
+func writeHTMLAttributes(w *core.Printer, tokenizer *html.Tokenizer) {
+	for {
+		key, val, more := tokenizer.TagAttr()
+		if len(key) == 0 && !more {
+			break
+		}
+		if len(key) > 0 {
+			w.WriteString(" ")
+			writeHTMLAttrName(w, string(key))
+			if len(val) > 0 {
+				w.WriteString("=\"")
+				writeHTMLAttrVal(w, string(val))
+				w.WriteString("\"")
+			}
+		}
+		if !more {
+			break
+		}
+	}
+}
+
+// escapeHTMLAttrValue escapes special characters in HTML attribute values.
+func escapeHTMLAttrValue(p *core.Printer, s string) {
+	var last int
+	for i := 0; i < len(s); i++ {
+		var esc string
+		switch s[i] {
+		case '"':
+			esc = """
+		case '&':
+			esc = "&"
+		case '<':
+			esc = "<"
+		case '>':
+			esc = ">"
+		default:
+			continue
+		}
+		p.WriteString(s[last:i])
+		p.WriteString(esc)
+		last = i + 1
+	}
+	p.WriteString(s[last:])
+}
diff --git a/internal/format/html_test.go b/internal/format/html_test.go
new file mode 100644
index 0000000..1863a0b
--- /dev/null
+++ b/internal/format/html_test.go
@@ -0,0 +1,367 @@
+package format
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/ryanfowler/fetch/internal/core"
+)
+
+func TestFormatHTML(t *testing.T) {
+	tests := []struct {
+		name    string
+		input   string
+		wantErr bool
+	}{
+		{
+			name:    "valid simple html",
+			input:   "text",
+			wantErr: false,
+		},
+		{
+			name:    "valid nested html",
+			input:   "test
content
", + wantErr: false, + }, + { + name: "valid html with attributes", + input: `

text

`, + wantErr: false, + }, + { + name: "void elements br", + input: "

line1
line2

", + wantErr: false, + }, + { + name: "void elements img", + input: `test`, + wantErr: false, + }, + { + name: "void elements input", + input: ``, + wantErr: false, + }, + { + name: "self-closing syntax", + input: "
", + wantErr: false, + }, + { + name: "doctype", + input: "", + wantErr: false, + }, + { + name: "comment", + input: "
content
", + wantErr: false, + }, + { + name: "script content preservation", + input: ``, + wantErr: false, + }, + { + name: "style content preservation", + input: ``, + wantErr: false, + }, + { + name: "pre whitespace preservation", + input: "
  line1\n  line2
", + wantErr: false, + }, + { + name: "textarea whitespace preservation", + input: "", + wantErr: false, + }, + { + name: "malformed html unclosed tag", + input: "

unclosed", + wantErr: false, // HTML tokenizer handles malformed HTML gracefully + }, + { + name: "malformed html mismatched tags", + input: "

", + wantErr: false, // HTML tokenizer handles malformed HTML gracefully + }, + { + name: "boolean attributes", + input: ``, + wantErr: false, + }, + { + name: "multiple attributes", + input: `link`, + wantErr: false, + }, + { + name: "inline elements", + input: "

Text with bold and italic

", + wantErr: false, + }, + { + name: "empty input", + input: "", + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := core.NewHandle(core.ColorOff).Stderr() + err := FormatHTML([]byte(tt.input), p) + if (err != nil) != tt.wantErr { + t.Errorf("FormatHTML() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func TestFormatHTMLOutput(t *testing.T) { + input := "

text

" + p := core.NewHandle(core.ColorOff).Stderr() + err := FormatHTML([]byte(input), p) + if err != nil { + t.Fatalf("FormatHTML() error = %v", err) + } + + output := string(p.Bytes()) + if !strings.Contains(output, "") { + t.Errorf("output should contain , got: %s", output) + } + if !strings.Contains(output, "") { + t.Errorf("output should contain , got: %s", output) + } + if !strings.Contains(output, "text") { + t.Errorf("output should contain text, got: %s", output) + } +} + +func TestFormatHTMLIndentation(t *testing.T) { + input := "Test

content

" + p := core.NewHandle(core.ColorOff).Stderr() + err := FormatHTML([]byte(input), p) + if err != nil { + t.Fatalf("FormatHTML() error = %v", err) + } + + output := string(p.Bytes()) + + // Check that block elements start on new lines with proper indentation. + lines := strings.Split(output, "\n") + foundIndentedDiv := false + for _, line := range lines { + if strings.Contains(line, "
") && strings.HasPrefix(line, " ") { + foundIndentedDiv = true + break + } + } + if !foundIndentedDiv { + t.Errorf("expected indented
, got output:\n%s", output) + } +} + +func TestFormatHTMLDoctype(t *testing.T) { + input := "" + p := core.NewHandle(core.ColorOff).Stderr() + err := FormatHTML([]byte(input), p) + if err != nil { + t.Fatalf("FormatHTML() error = %v", err) + } + + output := string(p.Bytes()) + if !strings.Contains(output, "") { + t.Errorf("output should contain , got: %s", output) + } +} + +func TestFormatHTMLComment(t *testing.T) { + input := "
content
" + p := core.NewHandle(core.ColorOff).Stderr() + err := FormatHTML([]byte(input), p) + if err != nil { + t.Fatalf("FormatHTML() error = %v", err) + } + + output := string(p.Bytes()) + if !strings.Contains(output, "") { + t.Errorf("output should contain comment, got: %s", output) + } +} + +func TestFormatHTMLVoidElements(t *testing.T) { + tests := []struct { + name string + input string + check string + }{ + { + name: "br element", + input: "

line1
line2

", + check: "
", + }, + { + name: "hr element", + input: "

", + check: "
", + }, + { + name: "img element", + input: ``, + check: ``, + }, + { + name: "input element", + input: ``, + check: ``, + }, + { + name: "meta element", + input: ``, + check: ``, + }, + { + name: "link element", + input: ``, + check: ``, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := core.NewHandle(core.ColorOff).Stderr() + err := FormatHTML([]byte(tt.input), p) + if err != nil { + t.Fatalf("FormatHTML() error = %v", err) + } + + output := string(p.Bytes()) + if !strings.Contains(output, tt.check) { + t.Errorf("output should contain %q, got: %s", tt.check, output) + } + // Void elements should not have closing tags. + tagName := strings.Split(strings.TrimPrefix(tt.check, "<"), " ")[0] + tagName = strings.TrimSuffix(tagName, ">") + closingTag := "" + if strings.Contains(output, closingTag) { + t.Errorf("output should not contain closing tag %s for void element, got: %s", closingTag, output) + } + }) + } +} + +func TestFormatHTMLPreservesRawText(t *testing.T) { + input := `` + p := core.NewHandle(core.ColorOff).Stderr() + err := FormatHTML([]byte(input), p) + if err != nil { + t.Fatalf("FormatHTML() error = %v", err) + } + + output := string(p.Bytes()) + // The raw content should be preserved. + if !strings.Contains(output, `if (x < 5 && y > 3)`) { + t.Errorf("script content should be preserved, got: %s", output) + } +} + +func TestFormatHTMLPreservesPreWhitespace(t *testing.T) { + input := "
  line1\n    line2
" + p := core.NewHandle(core.ColorOff).Stderr() + err := FormatHTML([]byte(input), p) + if err != nil { + t.Fatalf("FormatHTML() error = %v", err) + } + + output := string(p.Bytes()) + // The whitespace should be preserved. + if !strings.Contains(output, " line1") { + t.Errorf("pre whitespace should be preserved, got: %s", output) + } + if !strings.Contains(output, " line2") { + t.Errorf("pre whitespace should be preserved, got: %s", output) + } +} + +func TestFormatHTMLPlanExample(t *testing.T) { + input := `Test

Hello

Text with bold


` + expected := ` + + + Test + + +
+

Hello

+

Text with bold

+
+ +
+ + +` + p := core.NewHandle(core.ColorOff).Stderr() + err := FormatHTML([]byte(input), p) + if err != nil { + t.Fatalf("FormatHTML() error = %v", err) + } + + output := string(p.Bytes()) + if output != expected { + t.Errorf("FormatHTML() output mismatch.\nGot:\n%s\nExpected:\n%s", output, expected) + } +} + +func TestEscapeHTMLAttrValue(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "no escape needed", + input: "hello world", + want: "hello world", + }, + { + name: "with ampersand", + input: "foo & bar", + want: "foo & bar", + }, + { + name: "with less than", + input: "a < b", + want: "a < b", + }, + { + name: "with greater than", + input: "a > b", + want: "a > b", + }, + { + name: "with quotes", + input: `say "hello"`, + want: "say "hello"", + }, + { + name: "mixed special chars", + input: ``, + want: "<script>"alert('&')"</script>", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := core.NewHandle(core.ColorOff).Stderr() + escapeHTMLAttrValue(p, tt.input) + got := string(p.Bytes()) + if got != tt.want { + t.Errorf("escapeHTMLAttrValue() = %q, want %q", got, tt.want) + } + }) + } +}