diff --git a/cl/internal/convert/comments.go b/cl/internal/convert/comments.go index 8a634bf7..d911bd23 100644 --- a/cl/internal/convert/comments.go +++ b/cl/internal/convert/comments.go @@ -33,13 +33,54 @@ func NewCommentGroup(comments ...*goast.Comment) *goast.CommentGroup { func NewCommentGroupFromC(doc *ast.CommentGroup) *goast.CommentGroup { goDoc := &goast.CommentGroup{} - if doc != nil && doc.List != nil { - for _, comment := range doc.List { - goDoc.List = append(goDoc.List, - &goast.Comment{ - Slash: token.NoPos, Text: comment.Text, - }, - ) + if doc == nil || doc.List == nil { + return goDoc + } + + // Process comments, merging multi-line block comments into single nodes. + // Go's ast.Comment requires block comments (/* ... */) to be a single node, + // but line comments (// ...) should be separate nodes per line. + i := 0 + for i < len(doc.List) { + comment := doc.List[i] + text := strings.TrimRight(comment.Text, "\n") + + // Check if this is the start of a block comment + if strings.HasPrefix(text, "/*") { + // If the block comment is complete (contains */), add as single node + if strings.Contains(text, "*/") { + goDoc.List = append(goDoc.List, &goast.Comment{ + Slash: token.NoPos, Text: text, + }) + i++ + continue + } + + // Multi-line block comment: merge all lines until we find */ + var lines []string + lines = append(lines, text) + i++ + + for i < len(doc.List) { + nextText := strings.TrimRight(doc.List[i].Text, "\n") + lines = append(lines, nextText) + i++ + if strings.Contains(nextText, "*/") { + break + } + } + + // Join all lines with newlines to form complete block comment + mergedComment := strings.Join(lines, "\n") + goDoc.List = append(goDoc.List, &goast.Comment{ + Slash: token.NoPos, Text: mergedComment, + }) + } else { + // Line comment or other: add as-is (without trailing newline) + goDoc.List = append(goDoc.List, &goast.Comment{ + Slash: token.NoPos, Text: text, + }) + i++ } } return goDoc diff --git a/cl/internal/convert/comments_test.go b/cl/internal/convert/comments_test.go new file mode 100644 index 00000000..d5d04eef --- /dev/null +++ b/cl/internal/convert/comments_test.go @@ -0,0 +1,219 @@ +package convert_test + +import ( + "go/ast" + "go/parser" + "go/token" + "strings" + "testing" + + "github.com/goplus/gogen" + llcppgast "github.com/goplus/llcppg/ast" + "github.com/goplus/llcppg/cl/internal/convert" +) + +// TestCommentParsing tests the comment parsing and conversion logic. +// These atomic tests verify that comments are correctly converted from +// llcppg AST to Go AST following Go's ast.Comment specification: +// - For /* */ block comments: entire block must be a single Comment node +// - For // line comments: each line is a separate Comment node + +func TestBlockCommentSingleNode(t *testing.T) { + // A multi-line block comment should be a single ast.Comment node + blockComment := "/* Create an iterator for traversing a domain\n The domain NULL denotes the default domain */" + + // Create llcppg AST with single Comment node (correct approach) + llcppgDoc := &llcppgast.CommentGroup{ + List: []*llcppgast.Comment{ + {Text: blockComment}, + }, + } + + // Convert to Go AST + goDoc := convert.NewCommentGroupFromC(llcppgDoc) + + // Verify single Comment node + if len(goDoc.List) != 1 { + t.Errorf("Expected 1 Comment node, got %d", len(goDoc.List)) + } + if goDoc.List[0].Text != blockComment { + t.Errorf("Comment text mismatch.\nExpected: %q\nGot: %q", blockComment, goDoc.List[0].Text) + } + + // Verify the generated code is valid Go + assertValidGoCode(t, goDoc, "BlockCommentSingleNode") +} + +func TestBlockCommentSplitByNewlines_Merged(t *testing.T) { + // This test verifies that the conversion layer correctly merges + // split block comments back into a single node. + // The parser outputs multi-line block comments as separate lines with \n, + // and the conversion layer should merge them. + blockComment := "/* Create an iterator for traversing a domain\n The domain NULL denotes the default domain */" + + // Simulate what the parser outputs: split by newlines with \n at end + lines := strings.Split(blockComment, "\n") + var llcppgComments []*llcppgast.Comment + for _, line := range lines { + llcppgComments = append(llcppgComments, &llcppgast.Comment{Text: line + "\n"}) + } + llcppgDoc := &llcppgast.CommentGroup{List: llcppgComments} + + // Convert to Go AST - should merge into single node + goDoc := convert.NewCommentGroupFromC(llcppgDoc) + + // This should now produce 1 Comment node (merged) + if len(goDoc.List) != 1 { + t.Errorf("Expected 1 Comment node after merging, got %d", len(goDoc.List)) + } + + // Verify the merged comment is valid Go + assertValidGoCode(t, goDoc, "BlockCommentMerged") +} + +func TestLineCommentsSplitByNewlines(t *testing.T) { + // Line comments should be split by newlines, one Comment per line + // This is the correct behavior for // style comments + + // Create llcppg AST with separate Comment nodes for each line + llcppgDoc := &llcppgast.CommentGroup{ + List: []*llcppgast.Comment{ + {Text: "// Line 1"}, + {Text: "// Line 2"}, + {Text: "// Line 3"}, + }, + } + + // Convert to Go AST + goDoc := convert.NewCommentGroupFromC(llcppgDoc) + + // Verify three Comment nodes + if len(goDoc.List) != 3 { + t.Errorf("Expected 3 Comment nodes, got %d", len(goDoc.List)) + } + + // Verify the generated code is valid Go + assertValidGoCode(t, goDoc, "LineCommentsSplit") +} + +func TestSingleLineBlockComment(t *testing.T) { + // A single-line block comment should also be a single Comment node + blockComment := "/* Single line block comment */" + + llcppgDoc := &llcppgast.CommentGroup{ + List: []*llcppgast.Comment{ + {Text: blockComment}, + }, + } + + goDoc := convert.NewCommentGroupFromC(llcppgDoc) + + if len(goDoc.List) != 1 { + t.Errorf("Expected 1 Comment node, got %d", len(goDoc.List)) + } + if goDoc.List[0].Text != blockComment { + t.Errorf("Comment text mismatch.\nExpected: %q\nGot: %q", blockComment, goDoc.List[0].Text) + } + + assertValidGoCode(t, goDoc, "SingleLineBlockComment") +} + +func TestEmptyCommentGroup(t *testing.T) { + // Test nil and empty comment groups + t.Run("nil", func(t *testing.T) { + goDoc := convert.NewCommentGroupFromC(nil) + if goDoc == nil { + t.Error("Expected non-nil CommentGroup") + } + if len(goDoc.List) != 0 { + t.Errorf("Expected 0 Comment nodes, got %d", len(goDoc.List)) + } + }) + + t.Run("empty_list", func(t *testing.T) { + llcppgDoc := &llcppgast.CommentGroup{List: nil} + goDoc := convert.NewCommentGroupFromC(llcppgDoc) + if goDoc == nil { + t.Error("Expected non-nil CommentGroup") + } + if len(goDoc.List) != 0 { + t.Errorf("Expected 0 Comment nodes, got %d", len(goDoc.List)) + } + }) +} + +func TestMixedComments(t *testing.T) { + // Test a group with both block and line comments + // In practice, this would be separate groups, but test the conversion anyway + llcppgDoc := &llcppgast.CommentGroup{ + List: []*llcppgast.Comment{ + {Text: "/* Block comment */"}, + {Text: "// Line comment"}, + }, + } + + goDoc := convert.NewCommentGroupFromC(llcppgDoc) + + if len(goDoc.List) != 2 { + t.Errorf("Expected 2 Comment nodes, got %d", len(goDoc.List)) + } + + assertValidGoCode(t, goDoc, "MixedComments") +} + +func TestBlockCommentWithSpecialChars(t *testing.T) { + // Test block comments with special characters + testCases := []struct { + name string + comment string + }{ + {"asterisks", "/* Comment with * asterisks * inside */"}, + {"slashes", "/* Comment with / slashes / inside */"}, + {"newlines_and_tabs", "/* Comment with\n\ttabs and\n\tnewlines */"}, + {"unicode", "/* Unicode: 中文, 日本語, émojis 🎉 */"}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + llcppgDoc := &llcppgast.CommentGroup{ + List: []*llcppgast.Comment{ + {Text: tc.comment}, + }, + } + + goDoc := convert.NewCommentGroupFromC(llcppgDoc) + + if len(goDoc.List) != 1 { + t.Errorf("Expected 1 Comment node, got %d", len(goDoc.List)) + } + if goDoc.List[0].Text != tc.comment { + t.Errorf("Comment text mismatch.\nExpected: %q\nGot: %q", tc.comment, goDoc.List[0].Text) + } + }) + } +} + +// assertValidGoCode generates Go code with the comment and validates it +func assertValidGoCode(t *testing.T, commentGroup *ast.CommentGroup, funcName string) { + t.Helper() + + pkg := gogen.NewPackage("", "demo", nil) + fn := pkg.NewFunc(nil, funcName, nil, nil, false) + fn.SetComments(pkg, commentGroup) + fn.BodyStart(pkg).End() + + var buf strings.Builder + err := gogen.WriteTo(&buf, pkg, "") + if err != nil { + t.Fatalf("gogen.WriteTo failed: %v", err) + } + + code := buf.String() + t.Logf("Generated code:\n%s", code) + + fset := token.NewFileSet() + _, err = parser.ParseFile(fset, "generated.go", code, parser.ParseComments) + if err != nil { + t.Fatalf("Generated code is invalid Go: %v\nCode:\n%s", err, code) + } +} diff --git a/cl/internal/convert/gogen_test.go b/cl/internal/convert/gogen_test.go new file mode 100644 index 00000000..72d2a253 --- /dev/null +++ b/cl/internal/convert/gogen_test.go @@ -0,0 +1,166 @@ +package convert_test + +import ( + "go/ast" + "go/parser" + "go/token" + "strings" + "testing" + + "github.com/goplus/gogen" +) + +// TestMultiLineBlockComment tests the behavior of multi-line block comments +// with gogen's printer. This test verifies the FIXED comment parsing approach +// used in llcppg (_xtool/internal/parser/parser.go:ParseComment). +// +// Background: +// - Go's ast.Comment documentation states each Comment node represents +// "a single //-style or /*-style comment" +// - For /* */ comments, the entire block should be a single Comment node +// - The fix keeps block comments as single nodes instead of splitting by newlines +// +// This test should PASS with both gogen v1.19.7 and v1.20.2. +func TestMultiLineBlockComment(t *testing.T) { + // This is a typical multi-line C block comment from a header file + // Example from gettext: /* Create an iterator for traversing a domain + // The domain NULL denotes the default domain */ + rawComment := "/* Create an iterator for traversing a domain\n The domain NULL denotes the default domain */" + + t.Run("fixed_approach_single_comment_node", func(t *testing.T) { + // Fixed approach: keep block comment as single Comment node + // This is how ParseComment now works after the fix + pkg := gogen.NewPackage("", "demo", nil) + + commentList := []*ast.Comment{ + { + Slash: token.NoPos, + Text: rawComment, + }, + } + commentGroup := &ast.CommentGroup{List: commentList} + + fn := pkg.NewFunc(nil, "ExampleFunction", nil, nil, false) + fn.SetComments(pkg, commentGroup) + fn.BodyStart(pkg).End() + + var buf strings.Builder + err := gogen.WriteTo(&buf, pkg, "") + if err != nil { + t.Fatalf("gogen.WriteTo failed: %v", err) + } + + code := buf.String() + t.Logf("Generated code:\n%s", code) + + // Validate the generated code is valid Go + fset := token.NewFileSet() + _, err = parser.ParseFile(fset, "generated.go", code, parser.ParseComments) + if err != nil { + t.Fatalf("Generated code is invalid Go: %v\nCode:\n%s", err, code) + } + }) + + t.Run("line_comments_split_by_newlines", func(t *testing.T) { + // Line comments should be split by newlines (one Comment per line) + // This is the correct behavior for // style comments + pkg := gogen.NewPackage("", "demo", nil) + + commentList := []*ast.Comment{ + {Slash: token.NoPos, Text: "// Line 1 of comment"}, + {Slash: token.NoPos, Text: "// Line 2 of comment"}, + } + commentGroup := &ast.CommentGroup{List: commentList} + + fn := pkg.NewFunc(nil, "ExampleFunction", nil, nil, false) + fn.SetComments(pkg, commentGroup) + fn.BodyStart(pkg).End() + + var buf strings.Builder + err := gogen.WriteTo(&buf, pkg, "") + if err != nil { + t.Fatalf("gogen.WriteTo failed: %v", err) + } + + code := buf.String() + t.Logf("Generated code:\n%s", code) + + // Validate the generated code is valid Go + fset := token.NewFileSet() + _, err = parser.ParseFile(fset, "generated.go", code, parser.ParseComments) + if err != nil { + t.Fatalf("Generated code is invalid Go: %v\nCode:\n%s", err, code) + } + }) +} + +// TestParseCommentLogic tests the logic that should be implemented in ParseComment +// This simulates the fixed behavior of _xtool/internal/parser/parser.go:ParseComment +func TestParseCommentLogic(t *testing.T) { + // parseComment simulates the fixed ParseComment function + parseComment := func(rawComment string) []*ast.Comment { + // Block comment (/* ... */) - keep as single Comment node + if strings.HasPrefix(rawComment, "/*") { + return []*ast.Comment{{Text: rawComment}} + } + + // Line comments (// ...) - split by newlines + var comments []*ast.Comment + lines := strings.Split(rawComment, "\n") + for _, line := range lines { + line = strings.TrimRight(line, "\r") + if line != "" { + comments = append(comments, &ast.Comment{Text: line}) + } + } + return comments + } + + testCases := []struct { + name string + input string + expectedCount int + }{ + { + name: "single_line_block_comment", + input: "/* Single line block */", + expectedCount: 1, + }, + { + name: "multi_line_block_comment", + input: "/* Line 1\n Line 2\n Line 3 */", + expectedCount: 1, + }, + { + name: "single_line_comment", + input: "// Single line", + expectedCount: 1, + }, + { + name: "multi_line_line_comments", + input: "// Line 1\n// Line 2\n// Line 3", + expectedCount: 3, + }, + { + name: "block_comment_with_asterisks", + input: "/* Comment with * asterisks * inside */", + expectedCount: 1, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + comments := parseComment(tc.input) + if len(comments) != tc.expectedCount { + t.Errorf("Expected %d comments, got %d", tc.expectedCount, len(comments)) + } + + // For block comments, verify the text is preserved + if strings.HasPrefix(tc.input, "/*") { + if len(comments) > 0 && comments[0].Text != tc.input { + t.Errorf("Block comment text mismatch.\nExpected: %q\nGot: %q", tc.input, comments[0].Text) + } + } + }) + } +} diff --git a/go.mod b/go.mod index 39000489..cbd71bad 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/goplus/llcppg go 1.23.0 require ( - github.com/goplus/gogen v1.19.7 + github.com/goplus/gogen v1.20.2 github.com/goplus/lib v0.3.1 github.com/goplus/llgo v0.12.0 github.com/goplus/mod v0.19.0 diff --git a/go.sum b/go.sum index 0f00402a..f481f001 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,5 @@ -github.com/goplus/gogen v1.19.7 h1:0i30on0GwtYIJ+D9/I5QujswBU+mnnmNewoRk/uRVkw= -github.com/goplus/gogen v1.19.7/go.mod h1:owX2e1EyU5WD+Nm6oH2m/GXjLdlBYcwkLO4wN8HHXZI= +github.com/goplus/gogen v1.20.2 h1:c9wm7NzjWSrncbtH+lz4jM2j31p+6JTji8cjF1K79qg= +github.com/goplus/gogen v1.20.2/go.mod h1:87ZJD1mdljXx2pkvBrMGynGUz6m08brj9p6xhb5aq2Y= github.com/goplus/lib v0.3.1 h1:Xws4DBVvgOMu58awqB972wtvTacDbk3nqcbHjdx9KSg= github.com/goplus/lib v0.3.1/go.mod h1:SgJv3oPqLLHCu0gcL46ejOP3x7/2ry2Jtxu7ta32kp0= github.com/goplus/llgo v0.12.0 h1:UhbmwR+9fSy1y944rp6fPkSP39n4YhH4TpAN2QJ15ns=