-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCharsetConvertor.php
More file actions
158 lines (134 loc) · 4.05 KB
/
CharsetConvertor.php
File metadata and controls
158 lines (134 loc) · 4.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
<?php
/**
* 字符编码转换器
* 支持ANSI、UTF-16、UTF-16 Big Endian、UTF-8、UTF-8+Bom编码的数据互相转换
*
* @author fdipzone
* @DateTime 2023-06-29 23:55:15
*
*/
class CharsetConvertor
{
// 定义支持转换的字符集编码类型
const ANSI = 'ansi';
const UTF8 = 'utf8';
const UTF8BOM = 'utf8-bom';
const UTF16 = 'utf16';
const UTF16BE = 'utf16-be';
/**
* 执行转换(根据输入字符集编码与输出字符集编码)
*
* @author fdipzone
* @DateTime 2023-07-01 23:14:19
*
* @param string $str 要转换的数据
* @param string $in_charset 输入数据的字符集编码
* @param string $out_charset 输出数据的字符集编码
* @return string
*/
public static function convert(string $str, string $in_charset, string $out_charset):string
{
// 参数验证
if(empty($str))
{
throw new \Exception('str is empty');
}
if(!self::validCharset($in_charset))
{
throw new \Exception('in charset type not supported');
}
if(!self::validCharset($out_charset))
{
throw new \Exception('out charset type not supported');
}
// 将数据先转为utf8编码,如输入数据已经是utf8则不需要转换
$utf8_str = self::convertToUtf8($str, $in_charset);
// 再将数据从utf8编码转为目标编码,如输出数据已经是utf8则不需要转换
$converted_str = self::convertFromUtf8($utf8_str, $out_charset);
return $converted_str;
}
/**
* 判断字符集编码是否支持
*
* @author fdipzone
* @DateTime 2023-07-01 23:07:03
*
* @param string $charset 字符集编码
* @return boolean
*/
private static function validCharset(string $charset):bool
{
switch($charset)
{
case self::ANSI:
case self::UTF8:
case self::UTF8BOM:
case self::UTF16:
case self::UTF16BE:
return true;
}
return false;
}
/**
* 将数据转换为utf8编码
*
* @author fdipzone
* @DateTime 2023-07-01 23:19:22
*
* @param string $str 数据
* @param string $charset 数据字符集编码
* @return string
*/
private static function convertToUtf8(string $str, string $charset):string
{
switch($charset)
{
case self::ANSI:
$utf8_str = iconv('GBK', 'UTF-8//IGNORE', $str);
break;
case self::UTF8BOM:
$utf8_str = substr($str, 3);
break;
case self::UTF16:
$utf8_str = iconv('UTF-16le', 'UTF-8//IGNORE', substr($str, 2));
break;
case self::UTF16BE:
$utf8_str = iconv('UTF-16be', 'UTF-8//IGNORE', substr($str, 2));
break;
default:
$utf8_str = $str;
}
return $utf8_str;
}
/**
* 将utf8编码的数据转换为指定编码
*
* @author fdipzone
* @DateTime 2023-07-01 23:19:55
*
* @param string $utf8_str utf8编码的数据
* @param string $charset 指定要转换的字符集编码
* @return string
*/
private static function convertFromUtf8(string $utf8_str, string $charset):string
{
switch($charset)
{
case self::ANSI:
$converted_str = iconv('UTF-8', 'GBK//IGNORE', $utf8_str);
break;
case self::UTF8BOM:
$converted_str = "\xef\xbb\xbf".$utf8_str;
break;
case self::UTF16:
$converted_str = "\xff\xfe".iconv('UTF-8', 'UTF-16le//IGNORE', $utf8_str);
break;
case self::UTF16BE:
$converted_str = "\xfe\xff".iconv('UTF-8', 'UTF-16be//IGNORE', $utf8_str);
break;
default:
$converted_str = $utf8_str;
}
return $converted_str;
}
}