From 61a28e4c268dfa29ae345b15dcf09ec4404d7247 Mon Sep 17 00:00:00 2001 From: "AYANOKOUZI, Ryuunosuke" Date: Wed, 23 May 2012 17:42:02 +0900 Subject: [PATCH 1/4] unicode line folding --- lib/Text/FormatTable.pm | 124 +++++++++++++++++++--------------------- 1 file changed, 60 insertions(+), 64 deletions(-) diff --git a/lib/Text/FormatTable.pm b/lib/Text/FormatTable.pm index 5c2a7af..a006658 100644 --- a/lib/Text/FormatTable.pm +++ b/lib/Text/FormatTable.pm @@ -4,6 +4,10 @@ use Carp; use strict; use warnings; use vars qw($VERSION); +use Unicode::GCString; +use Text::LineFold; +use Encode; +my $utf8 = find_encoding('utf8'); $VERSION = '1.03'; @@ -13,11 +17,29 @@ Text::FormatTable - Format text tables =head1 SYNOPSIS - my $table = Text::FormatTable->new('r|l'); - $table->head('a', 'b'); + #!/usr/bin/perl -w + use Encode; + use Text::FormatTable; + + my $ascii = find_encoding('ascii'); + my $utf8 = find_encoding('utf8'); + my $eucjp = find_encoding('euc-jp'); + + my $text_ja = 'utf8 encoded text from outside perl'; + $text_ja = $utf8->decode($text_ja); + my $text_en = 'ascii encoded text from outside perl'; + $text_en = $utf8->decode($text_en); + + my $table = Text::FormatTable->new('| l | l |'); + $table->rule(); + $table->head( 'en', 'ja' ); $table->rule('='); - $table->row('c', 'd'); - print $table->render(20); + $table->row( $text_en, $text_ja ); + $table->rule(); + print $eucjp->encode( $table->render(60) ); + + exit; + __END__ =head1 DESCRIPTION @@ -32,24 +54,44 @@ Methods: =cut +sub linefold_columns_min { + my $str = shift; + + return 0 if ( columns($str) == 0 ); + + my $line_fold_columns_min = 0; + my $lf = Text::LineFold->new( ColMax => 1 ); + foreach ( split /\n/, + $lf->fold( '', '', utf8::is_utf8($str) ? $utf8->encode($str) : $str ) ) + { + my $columns = columns($_); + $line_fold_columns_min = $columns + if ( $line_fold_columns_min < $columns ); + } + + return $line_fold_columns_min; +} + +sub columns { + my $str = shift; + $str = utf8::is_utf8($str) ? $utf8->encode($str) : $str; + $str = $utf8->decode($str); + return Unicode::GCString->new($str)->columns(); +} + # Remove ANSI color sequences when calculating length sub _uncolorized_length($) { my $str = shift; $str =~ s/\e \[ [^m]* m//xmsg; - return length $str; + return columns $str; } # minimal width of $1 if word-wrapped sub _min_width($) { my $str = shift; - my $min; - for my $s (split(/\s+/,$str)) { - my $l = _uncolorized_length $s; - $min = $l if not defined $min or $l > $min; - } - return $min ? $min : 1; + return linefold_columns_min $str; } # width of $1 if not word-wrapped @@ -71,63 +113,17 @@ sub _max($$) sub _wrap($$) { my ($width, $text) = @_; - my @lines = split(/\n/, $text); my @w = (); - for my $l (@lines) { - push @w, @{_wrap_line($width, $l)}; - } + my $lf = Text::LineFold->new( ColMax => $width ); + @w = split /\n/, + $utf8->decode( + $lf->fold( + '', '', utf8::is_utf8($text) ? $utf8->encode($text) : $text + ) + ); return \@w; } -sub _wrap_line($$) -{ - my ($width, $text) = @_; - my $width_m1 = $width-1; - my @t = ($text); - while(1) { - my $t = pop @t; - my $l = _uncolorized_length $t; - if($l <= $width){ - # last line is ok => done - push @t, $t; - return \@t; - } - elsif($t =~ /^(.{0,$width_m1}\S)\s+(\S.*?)$/) { - # farest space < width - push @t, $1; - push @t, $2; - } - elsif($t =~ /(.{$width,}?\S)\s+(\S.*?)$/) { - # nearest space > width - if ( _uncolorized_length $1 > $width_m1 ) - { - # hard hyphanation - my $left = substr($1,0,$width); - my $right= substr($1,$width); - - push @t, $left; - push @t, $right; - push @t, $2; - } - else - { - push @t, $1; - push @t, $2; - } - } - else { - # hard hyphanation - my $left = substr($t,0,$width); - my $right= substr($t,$width); - - push @t, $left; - push @t, $right; - return \@t; - } - } - return \@t; -} - # render left-box $2 with width $1 sub _l_box($$) { From 7e849659ea3b224e5471f0fba54e0b5ae49680d7 Mon Sep 17 00:00:00 2001 From: "AYANOKOUZI, Ryuunosuke" Date: Wed, 23 May 2012 17:49:47 +0900 Subject: [PATCH 2/4] fix typo --- lib/Text/FormatTable.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Text/FormatTable.pm b/lib/Text/FormatTable.pm index a006658..6c229cc 100644 --- a/lib/Text/FormatTable.pm +++ b/lib/Text/FormatTable.pm @@ -28,7 +28,7 @@ Text::FormatTable - Format text tables my $text_ja = 'utf8 encoded text from outside perl'; $text_ja = $utf8->decode($text_ja); my $text_en = 'ascii encoded text from outside perl'; - $text_en = $utf8->decode($text_en); + $text_en = $ascii->decode($text_en); my $table = Text::FormatTable->new('| l | l |'); $table->rule(); From 9a7d97d5f57055c2affc0b36735677e39dc3e8d8 Mon Sep 17 00:00:00 2001 From: "AYANOKOUZI, Ryuunosuke" Date: Fri, 25 May 2012 13:52:18 +0900 Subject: [PATCH 3/4] use Unicode::LineBreak for calculating min column width --- lib/Text/FormatTable.pm | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/lib/Text/FormatTable.pm b/lib/Text/FormatTable.pm index 6c229cc..364ae52 100644 --- a/lib/Text/FormatTable.pm +++ b/lib/Text/FormatTable.pm @@ -5,9 +5,9 @@ use strict; use warnings; use vars qw($VERSION); use Unicode::GCString; +use Unicode::LineBreak; use Text::LineFold; use Encode; -my $utf8 = find_encoding('utf8'); $VERSION = '1.03'; @@ -54,28 +54,23 @@ Methods: =cut -sub linefold_columns_min { +sub _linefold_columns_min { my $str = shift; - - return 0 if ( columns($str) == 0 ); - - my $line_fold_columns_min = 0; - my $lf = Text::LineFold->new( ColMax => 1 ); - foreach ( split /\n/, - $lf->fold( '', '', utf8::is_utf8($str) ? $utf8->encode($str) : $str ) ) - { - my $columns = columns($_); - $line_fold_columns_min = $columns - if ( $line_fold_columns_min < $columns ); + my $min; + my $lb = Unicode::LineBreak->new( ColMax => 1 ); + foreach ($lb->break($str)) { + my $l = $_->columns; + $min = $l if not defined $min or $l > $min; } - - return $line_fold_columns_min; + return $min ? $min : 1; } -sub columns { - my $str = shift; - $str = utf8::is_utf8($str) ? $utf8->encode($str) : $str; - $str = $utf8->decode($str); +sub _columns { + my $str = scalar shift; + + return 0 if ( !defined $str || $str eq '' ); + + $str = decode_utf8($str) unless utf8::is_utf8($str); return Unicode::GCString->new($str)->columns(); } @@ -84,14 +79,14 @@ sub _uncolorized_length($) { my $str = shift; $str =~ s/\e \[ [^m]* m//xmsg; - return columns $str; + return _columns $str; } # minimal width of $1 if word-wrapped sub _min_width($) { my $str = shift; - return linefold_columns_min $str; + return _linefold_columns_min $str; } # width of $1 if not word-wrapped @@ -116,9 +111,9 @@ sub _wrap($$) my @w = (); my $lf = Text::LineFold->new( ColMax => $width ); @w = split /\n/, - $utf8->decode( + decode_utf8( $lf->fold( - '', '', utf8::is_utf8($text) ? $utf8->encode($text) : $text + '', '', utf8::is_utf8($text) ? encode_utf8($text) : $text ) ); return \@w; From e9c290de199a6a49a4e4f107701ac1d5779384b3 Mon Sep 17 00:00:00 2001 From: "AYANOKOUZI, Ryuunosuke" Date: Sat, 21 Jun 2014 09:30:13 +0900 Subject: [PATCH 4/4] add test cases with CJK text, revert SYNOPSIS --- lib/Text/FormatTable.pm | 26 ++-------- test.pl | 110 +++++++++++++++++++++++++++++++++++----- 2 files changed, 100 insertions(+), 36 deletions(-) diff --git a/lib/Text/FormatTable.pm b/lib/Text/FormatTable.pm index 364ae52..ccc4856 100644 --- a/lib/Text/FormatTable.pm +++ b/lib/Text/FormatTable.pm @@ -17,29 +17,11 @@ Text::FormatTable - Format text tables =head1 SYNOPSIS - #!/usr/bin/perl -w - use Encode; - use Text::FormatTable; - - my $ascii = find_encoding('ascii'); - my $utf8 = find_encoding('utf8'); - my $eucjp = find_encoding('euc-jp'); - - my $text_ja = 'utf8 encoded text from outside perl'; - $text_ja = $utf8->decode($text_ja); - my $text_en = 'ascii encoded text from outside perl'; - $text_en = $ascii->decode($text_en); - - my $table = Text::FormatTable->new('| l | l |'); - $table->rule(); - $table->head( 'en', 'ja' ); + my $table = Text::FormatTable->new('r|l'); + $table->head('a', 'b'); $table->rule('='); - $table->row( $text_en, $text_ja ); - $table->rule(); - print $eucjp->encode( $table->render(60) ); - - exit; - __END__ + $table->row('c', 'd'); + print $table->render(20); =head1 DESCRIPTION diff --git a/test.pl b/test.pl index 94d3d07..6d90cac 100644 --- a/test.pl +++ b/test.pl @@ -5,6 +5,7 @@ use strict; use warnings; +use utf8; { my $table = Text::FormatTable->new('r| l l'); @@ -17,20 +18,19 @@ my $is = $table->render(15); my $shouldbe = <<'END'; - a| b c -================= -this a| oh, yep - test,| cool, -a nice| a - test| test! -------+---------- - you| yes, z - mean| it - it's| is. -really| - a| - test?| -================= + a| b c +==================== + this a| oh, yep +test, a| cool, a + nice| test! + test| +-------+------------ + you| yes, it z + mean| is. + it's| + really| +a test?| +==================== END ok($is, $shouldbe); @@ -72,3 +72,85 @@ END my $output = $table->render(); ok(not defined $warning); } + +# line folding using Text::LineFold +{ + my @format = (' 14l ', ' 20L ', ' 30r ', ' 40R '); + my $table = Text::FormatTable->new( '|' . ( join '|', @format ) . '|' ); + my @text = (); + push @text, +q(This document is intended to give you a quick overview of the Perl programming language, along with pointers to further documentation. It is intended as a "bootstrap" guide for those who are new to the language, and provides just enough information for you to be able to read other peoples' Perl and understand roughly what it's doing, or write your own simple scripts.); + push @text, +q(この文書は Perl プログラミング言語の簡単な概要を伝えて、更なる 文書へのポインタを示すことを目的としています。 これはこの言語を知らない人のためへの「自習」ガイドを目的としていて、 他の人の Perl を読んで何をしているかを大まかに理解したり、 自分自身で簡単なスクリプトを書くことができるようになるために 十分な情報を提供しています。); + $table->rule('-'); + $table->head(@format); + foreach my $text (@text) { + $table->rule('-'); + $table->row( map { $text } ( 0 .. $#format ) ); + } + $table->rule('-'); + my $is = $table->render(); + my $shouldbe = <<'END'; ++----------------+----------------------+--------------------------------+------------------------------------------+ +| 14l | 20L | 30r | 40R | ++----------------+----------------------+--------------------------------+------------------------------------------+ +| This document | | This document is intended to | | +| is intended to | | give you a quick overview of | | +| give you a | | the Perl programming language, | | +| quick overview | | along with pointers to further | | +| of the Perl | | documentation. It is intended | | +| programming | | as a "bootstrap" guide for | | +| language, | | those who are new to the | | +| along with | | language, and provides just | | +| pointers to | | enough information for you to | | +| further | | be able to read other peoples' | | +| documentation. | | Perl and understand roughly | | +| It is intended | This document is | what it's doing, or write your | | +| as a | intended to give you | own simple scripts. | | +| "bootstrap" | a quick overview of | | | +| guide for | the Perl programming | | | +| those who are | language, along with | | | +| new to the | pointers to further | | | +| language, and | documentation. It is | | | +| provides just | intended as a | | | +| enough | "bootstrap" guide | | | +| information | for those who are | | | +| for you to be | new to the language, | | This document is intended to give you a | +| able to read | and provides just | | quick overview of the Perl programming | +| other peoples' | enough information | | language, along with pointers to further | +| Perl and | for you to be able | | documentation. It is intended as a | +| understand | to read other | | "bootstrap" guide for those who are new | +| roughly what | peoples' Perl and | | to the language, and provides just | +| it's doing, or | understand roughly | | enough information for you to be able to | +| write your own | what it's doing, or | | read other peoples' Perl and understand | +| simple | write your own | | roughly what it's doing, or write your | +| scripts. | simple scripts. | | own simple scripts. | ++----------------+----------------------+--------------------------------+------------------------------------------+ +| この文書は | | この文書は Perl プログラミング | | +| Perl プログラ | | 言語の簡単な概要を伝えて、更な | | +| ミング言語の簡 | | る 文書へのポインタを示すこと | | +| 単な概要を伝え | | を目的としています。 これはこ | | +| て、更なる 文 | | の言語を知らない人のためへの | | +| 書へのポインタ | | 「自習」ガイドを目的としてい | | +| を示すことを目 | | て、 他の人の Perl を読んで何 | | +| 的としていま | この文書は Perl プロ | をしているかを大まかに理解した | | +| す。 これはこ | グラミング言語の簡単 | り、 自分自身で簡単なスクリプ | | +| の言語を知らな | な概要を伝えて、更な | トを書くことができるようになる | | +| い人のためへの | る 文書へのポインタ | ために 十分な情報を提供してい | | +| 「自習」ガイド | を示すことを目的とし | ます。 | | +| を目的としてい | ています。 これはこ | | | +| て、 他の人の | の言語を知らない人の | | | +| Perl を読んで | ためへの「自習」ガイ | | | +| 何をしているか | ドを目的としていて、 | | この文書は Perl プログラミング言語の簡単 | +| を大まかに理解 | 他の人の Perl を読ん | | な概要を伝えて、更なる 文書へのポインタ | +| したり、 自分 | で何をしているかを大 | | を示すことを目的としています。 これはこ | +| 自身で簡単なス | まかに理解したり、 | | の言語を知らない人のためへの「自習」ガイ | +| クリプトを書く | 自分自身で簡単なスク | | ドを目的としていて、 他の人の Perl を読 | +| ことができるよ | リプトを書くことがで | | んで何をしているかを大まかに理解したり、 | +| うになるために | きるようになるために | | 自分自身で簡単なスクリプトを書くことがで | +| 十分な情報を提 | 十分な情報を提供して | | きるようになるために 十分な情報を提供し | +| 供しています。 | います。 | | ています。 | ++----------------+----------------------+--------------------------------+------------------------------------------+ +END + ok( $is, $shouldbe ); +}