-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathxml_helper.rb
More file actions
265 lines (210 loc) · 7.06 KB
/
xml_helper.rb
File metadata and controls
265 lines (210 loc) · 7.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
# encoding: utf-8
require 'nokogiri'
require 'cgi'
require './str_helper.rb'
module XMLHelper
extend self # So methods don't have to be defined with self.method_name
def xml_to_str(xml)
strings = {}
parse_string = lambda do |element|
return {'string' => nil} if element.nil?
s = ''
# element.text strips HTML like <b> and/or <i> that we want to keep, so we loop over the children
# taking each child's to_xml to preserve them. Manually setting encoding seems to be necessary
# to preserve multi-byte characters.
element.children.each do |c|
s << c.to_xml(:encoding => 'utf-8')
end
s = StrHelper.clean(s).gsub(/\\("|')/, '\1')
{'string' => s}
end
doc = Nokogiri::XML(xml)
doc.xpath('//string').each do |str_el|
strings[str_el.attr('name')] = parse_string.call(str_el)
end
doc.xpath('//string-array').each do |sa_el|
sa_el.element_children.each_with_index do |item_el, i|
strings[sa_el.attr('name') << "[#{i}]"] = parse_string.call(item_el)
end
end
doc.xpath('//plurals').each do |sp_el|
%w(zero one two few many other).each do |quantity|
strings[sp_el.attr('name') << "[:#{quantity}]"] = parse_string.call(sp_el.element_children.at("[@quantity=#{quantity}]"))
end
end
strings
end
def str_to_xml(strings, model_strings = nil)
doc = Nokogiri::XML('')
res = Nokogiri::XML::Node.new('resources', doc)
doc.add_child(res)
str_ars = {}
str_pls = {}
errors = {}
model_strings ||= strings
model_strings.keys.each do |key|
value = strings[key]
if not key =~ /\[/ # string
next if value.nil? or value['string'].empty?
e = check_tags(value['string'])
errors[key] = e if e
next if not errors.empty?
str = Nokogiri::XML::Node.new('string', doc)
str['name'] = key
str['formatted'] = 'false'
str.content = str_hash_to_s(value)
res.add_child(str)
elsif not key =~ /:/ # string-array
name = key.split('[').first
# puts "Looking at string-array: #{name}"
ar_present = false
if not str_ars[name].nil?
# puts "array is known present because str_ars[#{name}] is not nil"
ar_present = true
else
i = 0
# puts "i = #{i}"
while not model_strings[name + "[#{i}]"].nil? # Step through all actual values in original
# puts "model_strings['#{name}[#{i}]'] is not nil, so let's see if it exists in strings"
if value.nil? or value['string'].empty?
# puts "strings['#{name}[#{i}]'] is nil or empty"
i += 1
# puts "i = #{i}"
next
else
# puts "strings['#{name}[#{i}]'] exists, so this array is present"
ar_present = true
break
end
end
# puts "searched through all of strings['#{name}'] without finding anything, so this array is not present" if not ar_present
end
# puts "array '#{name} present?: #{ar_present}"
next if not ar_present
value ||= model_strings[key]
value['string'] = model_strings[key]['string'] if value['string'].empty?
e = check_tags(value['string'])
errors[name] = e if e
next if not errors.empty?
str_ars[name] ||= Nokogiri::XML::Node.new('string-array', doc)
str_ars[name]['name'] = name
item = Nokogiri::XML::Node.new('item', doc)
item.content = str_hash_to_s(value)
str_ars[name].add_child(item)
else # plural
next if value.nil? or value['string'].empty?
name = key.split('[').first
quantity = key.split(':')[1].split(']').first
e = check_tags(value['string'])
errors[name] = e if e
next if not errors.empty?
str_pls[name] ||= Nokogiri::XML::Node.new('plurals', doc)
str_pls[name]['name'] = name
item = Nokogiri::XML::Node.new('item', doc)
item['quantity'] = quantity
item.content = str_hash_to_s(value)
str_pls[name].add_child(item)
end
end
if not errors.empty?
s = ""
errors.each do |name, error|
s << "#{name}:\n\t#{error}\n"
end
return s
end
str_ars.values.each do |a|
res.add_child(a)
end
str_pls.values.each do |p|
res.add_child(p)
end
CGI.unescapeHTML(doc.to_xml(:encoding => 'utf-8'))
end
private
def str_hash_to_s(hash)
escape_quotes(hash['string']).gsub(/\r|\n/, '')
end
# I couldn't figure out how to make a regex do this for me...
# (the hard part being: not escaping quotes that are within a tag)
def escape_quotes(s)
i = 0
len = s.length
brackets = 0
while i < len do
if s[i] == '<' then brackets += 1 end
if s[i] == '>' then brackets -= 1 end
if brackets < 1 and (s[i] == "'" or s[i] == '"')
unless (i == 0 or i == len - 1) and StrHelper::quoted?(s)
s.insert(i, '\\')
i += 1
len += 1
end
end
i += 1
end
s
end
def check_tags(s)
open_tags = []
cur_tag = nil
cur_tag_extras = nil
is_end_tag = false
i = 0
len = s.length
while (i < len) do
c = s[i]
if c == '<'
if cur_tag
return "Unclosed open-bracket at index #{i}: <#{cur_tag}"
else
cur_tag = ""
cur_tag_extras = nil
is_end_tag = false
end
elsif c == '>'
if cur_tag
return "Empty tag at index #{i}" if cur_tag.length == 0
if is_end_tag
if cur_tag_extras and cur_tag_extras.length > 0
return "End tag with attributes at index #{i}: <#{cur_tag} #{cur_tag_extras}>"
elsif cur_tag == open_tags.last
open_tags.pop
cur_tag = nil
else
return "Close tag </#{cur_tag}> does not match open tag <#{open_tags.last}> at index #{i}"
end
else
if s[i-1] != '/'
open_tags.push(cur_tag)
end
if cur_tag_extras and not cur_tag_extras.match(/^(\s[a-z]+="[^"]*")+\/?$/)
return "Bad attrubutes '#{cur_tag_extras}' in tag <#{cur_tag}> at index #{i}"
end
cur_tag = nil
end
else
return "Close-bracket where it doesn't belong at index #{i}"
end
elsif c == ' ' and cur_tag == ''
return "Space at begining of tag at index #{i}"
elsif c == ' ' and cur_tag and is_end_tag
return "Space in end tag '</#{cur_tag}' at index #{i}"
elsif c == ' ' and cur_tag and not cur_tag_extras
cur_tag_extras = " "
elsif c == '/' and s[i-1] == '<'
is_end_tag = true
elsif cur_tag
if not cur_tag_extras
cur_tag << c
else
cur_tag_extras << c
end
end
i += 1
end
return "Unclosed tag: <#{open_tags.pop}>" if not open_tags.empty?
return "Unclosed open-bracket: <#{cur_tag}" if cur_tag
nil
end
end