C0 code coverage information
Code reported as executed by Ruby looks like this...
and this: this line is also marked as covered.
Lines considered as run by rcov, but not reported by Ruby, look like this,
and this: these lines were inferred by rcov (using simple heuristics).
Finally, here's a line marked as not executed.
Name |
Total lines |
Lines of code |
Total coverage |
Code coverage |
magic_xml.rb
|
1379
|
964
|
|
|
1 # Needed for parsing
2 require 'rexml/parsers/baseparser'
3 # Needed for fetching XMLs from the Internet
4 require 'uri'
5 require 'net/http'
6
7 # FIXME: Make comment formatting RDoc-friendly. It's not always so now.
8
9 # In Ruby 2 Symbol will be a subclass of String, and
10 # this won't be needed any more. Before then...
11 class Symbol
12 include Comparable
13 def <=>(other)
14 raise ArgumentError.new("comparison of #{self.class} with #{other.class} failed") unless other.is_a? Symbol
15 to_s <=> other.to_s
16 end
17
18 alias_method :eqeqeq_before_magic_xml, :===
19 def ===(*args, &blk)
20 if args.size >= 1 and args[0].is_a? XML
21 self == args[0].name
22 else
23 eqeqeq_before_magic_xml(*args, &blk)
24 end
25 end
26 end
27
28 class Hash
29 alias_method :eqeqeq_before_magic_xml, :===
30 def ===(*args, &blk)
31 if args.size >= 1 and args[0].is_a? XML
32 all?{|k,v| v === args[0][k]}
33 else
34 eqeqeq_before_magic_xml(*args, &blk)
35 end
36 end
37 end
38
39 class String
40 # Escape string for output as XML text (< > &)
41 def xml_escape
42 replacements = {"<" => "<", ">" => ">", "&" => "&" }
43 gsub(/([<>&])/) { replacements[$1] }
44 end
45 # Escape characters for output as XML attribute values (< > & ' ")
46 def xml_attr_escape
47 replacements = {"<" => "<", ">" => ">", "&" => "&", "\"" => """, "'" => "'"}
48 gsub(/([<>&\'\"])/) { replacements[$1] }
49 end
50 # Unescape entities
51 # Supports:
52 # * Full set of HTML-compatible named entities
53 # * Decimal entities Ӓ
54 # * Hex entities ꂱ
55 def xml_unescape(extra_entities=nil)
56 @@xhtml_entity_replacements ||= {
57 'nbsp' => 160,
58 'iexcl' => 161,
59 'cent' => 162,
60 'pound' => 163,
61 'curren' => 164,
62 'yen' => 165,
63 'brvbar' => 166,
64 'sect' => 167,
65 'uml' => 168,
66 'copy' => 169,
67 'ordf' => 170,
68 'laquo' => 171,
69 'not' => 172,
70 'shy' => 173,
71 'reg' => 174,
72 'macr' => 175,
73 'deg' => 176,
74 'plusmn' => 177,
75 'sup2' => 178,
76 'sup3' => 179,
77 'acute' => 180,
78 'micro' => 181,
79 'para' => 182,
80 'middot' => 183,
81 'cedil' => 184,
82 'sup1' => 185,
83 'ordm' => 186,
84 'raquo' => 187,
85 'frac14' => 188,
86 'frac12' => 189,
87 'frac34' => 190,
88 'iquest' => 191,
89 'Agrave' => 192,
90 'Aacute' => 193,
91 'Acirc' => 194,
92 'Atilde' => 195,
93 'Auml' => 196,
94 'Aring' => 197,
95 'AElig' => 198,
96 'Ccedil' => 199,
97 'Egrave' => 200,
98 'Eacute' => 201,
99 'Ecirc' => 202,
100 'Euml' => 203,
101 'Igrave' => 204,
102 'Iacute' => 205,
103 'Icirc' => 206,
104 'Iuml' => 207,
105 'ETH' => 208,
106 'Ntilde' => 209,
107 'Ograve' => 210,
108 'Oacute' => 211,
109 'Ocirc' => 212,
110 'Otilde' => 213,
111 'Ouml' => 214,
112 'times' => 215,
113 'Oslash' => 216,
114 'Ugrave' => 217,
115 'Uacute' => 218,
116 'Ucirc' => 219,
117 'Uuml' => 220,
118 'Yacute' => 221,
119 'THORN' => 222,
120 'szlig' => 223,
121 'agrave' => 224,
122 'aacute' => 225,
123 'acirc' => 226,
124 'atilde' => 227,
125 'auml' => 228,
126 'aring' => 229,
127 'aelig' => 230,
128 'ccedil' => 231,
129 'egrave' => 232,
130 'eacute' => 233,
131 'ecirc' => 234,
132 'euml' => 235,
133 'igrave' => 236,
134 'iacute' => 237,
135 'icirc' => 238,
136 'iuml' => 239,
137 'eth' => 240,
138 'ntilde' => 241,
139 'ograve' => 242,
140 'oacute' => 243,
141 'ocirc' => 244,
142 'otilde' => 245,
143 'ouml' => 246,
144 'divide' => 247,
145 'oslash' => 248,
146 'ugrave' => 249,
147 'uacute' => 250,
148 'ucirc' => 251,
149 'uuml' => 252,
150 'yacute' => 253,
151 'thorn' => 254,
152 'yuml' => 255,
153 'quot' => 34,
154 'apos' => 39, # Wasn't present in the HTML entities set, but is defined in XML standard
155 'amp' => 38,
156 'lt' => 60,
157 'gt' => 62,
158 'OElig' => 338,
159 'oelig' => 339,
160 'Scaron' => 352,
161 'scaron' => 353,
162 'Yuml' => 376,
163 'circ' => 710,
164 'tilde' => 732,
165 'ensp' => 8194,
166 'emsp' => 8195,
167 'thinsp' => 8201,
168 'zwnj' => 8204,
169 'zwj' => 8205,
170 'lrm' => 8206,
171 'rlm' => 8207,
172 'ndash' => 8211,
173 'mdash' => 8212,
174 'lsquo' => 8216,
175 'rsquo' => 8217,
176 'sbquo' => 8218,
177 'ldquo' => 8220,
178 'rdquo' => 8221,
179 'bdquo' => 8222,
180 'dagger' => 8224,
181 'Dagger' => 8225,
182 'permil' => 8240,
183 'lsaquo' => 8249,
184 'rsaquo' => 8250,
185 'euro' => 8364,
186 'fnof' => 402,
187 'Alpha' => 913,
188 'Beta' => 914,
189 'Gamma' => 915,
190 'Delta' => 916,
191 'Epsilon' => 917,
192 'Zeta' => 918,
193 'Eta' => 919,
194 'Theta' => 920,
195 'Iota' => 921,
196 'Kappa' => 922,
197 'Lambda' => 923,
198 'Mu' => 924,
199 'Nu' => 925,
200 'Xi' => 926,
201 'Omicron' => 927,
202 'Pi' => 928,
203 'Rho' => 929,
204 'Sigma' => 931,
205 'Tau' => 932,
206 'Upsilon' => 933,
207 'Phi' => 934,
208 'Chi' => 935,
209 'Psi' => 936,
210 'Omega' => 937,
211 'alpha' => 945,
212 'beta' => 946,
213 'gamma' => 947,
214 'delta' => 948,
215 'epsilon' => 949,
216 'zeta' => 950,
217 'eta' => 951,
218 'theta' => 952,
219 'iota' => 953,
220 'kappa' => 954,
221 'lambda' => 955,
222 'mu' => 956,
223 'nu' => 957,
224 'xi' => 958,
225 'omicron' => 959,
226 'pi' => 960,
227 'rho' => 961,
228 'sigmaf' => 962,
229 'sigma' => 963,
230 'tau' => 964,
231 'upsilon' => 965,
232 'phi' => 966,
233 'chi' => 967,
234 'psi' => 968,
235 'omega' => 969,
236 'thetasym' => 977,
237 'upsih' => 978,
238 'piv' => 982,
239 'bull' => 8226,
240 'hellip' => 8230,
241 'prime' => 8242,
242 'Prime' => 8243,
243 'oline' => 8254,
244 'frasl' => 8260,
245 'weierp' => 8472,
246 'image' => 8465,
247 'real' => 8476,
248 'trade' => 8482,
249 'alefsym' => 8501,
250 'larr' => 8592,
251 'uarr' => 8593,
252 'rarr' => 8594,
253 'darr' => 8595,
254 'harr' => 8596,
255 'crarr' => 8629,
256 'lArr' => 8656,
257 'uArr' => 8657,
258 'rArr' => 8658,
259 'dArr' => 8659,
260 'hArr' => 8660,
261 'forall' => 8704,
262 'part' => 8706,
263 'exist' => 8707,
264 'empty' => 8709,
265 'nabla' => 8711,
266 'isin' => 8712,
267 'notin' => 8713,
268 'ni' => 8715,
269 'prod' => 8719,
270 'sum' => 8721,
271 'minus' => 8722,
272 'lowast' => 8727,
273 'radic' => 8730,
274 'prop' => 8733,
275 'infin' => 8734,
276 'ang' => 8736,
277 'and' => 8743,
278 'or' => 8744,
279 'cap' => 8745,
280 'cup' => 8746,
281 'int' => 8747,
282 'there4' => 8756,
283 'sim' => 8764,
284 'cong' => 8773,
285 'asymp' => 8776,
286 'ne' => 8800,
287 'equiv' => 8801,
288 'le' => 8804,
289 'ge' => 8805,
290 'sub' => 8834,
291 'sup' => 8835,
292 'nsub' => 8836,
293 'sube' => 8838,
294 'supe' => 8839,
295 'oplus' => 8853,
296 'otimes' => 8855,
297 'perp' => 8869,
298 'sdot' => 8901,
299 'lceil' => 8968,
300 'rceil' => 8969,
301 'lfloor' => 8970,
302 'rfloor' => 8971,
303 'lang' => 9001,
304 'rang' => 9002,
305 'loz' => 9674,
306 'spades' => 9824,
307 'clubs' => 9827,
308 'hearts' => 9829,
309 'diams' => 9830,
310 }
311 gsub(/&(?:([a-zA-Z]+)|#([0-9]+)|#x([a-fA-F0-9]+));/) {
312 if $1 then
313 v = @@xhtml_entity_replacements[$1]
314 # Nonstandard entity
315 unless v
316 if extra_entities.is_a? Proc
317 v = extra_entities.call($1)
318 # Well, we expect a Hash here, but any container will do.
319 # As long as it's not a nil.
320 elsif extra_entities
321 v = extra_entities[$1]
322 end
323 end
324 raise "Unknown escape #{$1}" unless v
325 elsif $2
326 v = $2.to_i
327 else
328 v = $3.hex
329 end
330 # v can be a String or an Integer
331 if v.is_a? String then v else [v].pack('U') end
332 }
333 end
334 def xml_parse
335 XML.parse(self)
336 end
337 end
338
339 class File
340 def xml_parse
341 XML.parse(self)
342 end
343 end
344
345 class Array
346 # children of any element
347 def children(*args, &blk)
348 res = []
349 each{|c|
350 res += c.children(*args, &blk) if c.is_a? XML
351 }
352 res
353 end
354 # descendants of any element
355 def descendants(*args, &blk)
356 res = []
357 each{|c|
358 res += c.descendants(*args, &blk) if c.is_a? XML
359 }
360 res
361 end
362 end
363
364 # Methods of Enumerable.
365 # It is not easy to design good methods, because XML
366 # is not really "a container", it just acts as one sometimes.
367 # Generally:
368 # * Methods that return nil should work
369 # * Methods that return an element should work
370 # * Methods that return a container should return XML container, not Array
371 # * Conversion methods should convert
372 #
373 # FIXME: Many methods use .dup, but do we want a shallow or a deep copy ?
374 class XML
375 include Enumerable
376 # Default any? is ok
377 # Default all? is ok
378
379 # Iterate over children, possibly with a selector
380 def each(*selector, &blk)
381 children(*selector, &blk)
382 self
383 end
384
385 # Sort XML children of XML element.
386 def sort_by(*args, &blk)
387 self.dup{ @contents = @contents.select{|c| c.is_a? XML}.sort_by(*args, &blk) }
388 end
389
390 # Sort children of XML element.
391 def children_sort_by(*args, &blk)
392 self.dup{ @contents = @contents.sort_by(*args, &blk) }
393 end
394
395 # Sort children of XML element.
396 #
397 # Using sort is highly wrong, as XML (and XML-extras) is not even Comparable.
398 # Use sort_by instead.
399 #
400 # Unless you define your own XML#<=> operator, or do something equally weird.
401 def sort(*args, &blk)
402 self.dup{ @contents = @contents.sort(*args, &blk) }
403 end
404
405 #collect/map
406 #detect/find
407 #each_cons
408 #each_slice
409 #each_with_index
410 #to_a
411 #entries
412 #enum_cons
413 #enum_slice
414 #enum
415 # grep
416 # include?/member?
417 # inject
418 # max/min
419 # max_by/min_by - Ruby 1.9
420 # partition
421 # reject
422 # sort
423 # sort_by
424 # to_set
425 # zip
426 # And Enumerable::Enumerator-generating methods
427 end
428
429 # Class methods
430 class XML
431 # XML.foo! == xml!(:foo)
432 # XML.foo == xml(:foo)
433 def self.method_missing(meth, *args, &blk)
434 if meth.to_s =~ /^(.*)!$/
435 xml!($1.to_sym, *args, &blk)
436 else
437 XML.new(meth, *args, &blk)
438 end
439 end
440
441 # Read file and parse
442 def self.from_file(file)
443 file = File.open(file) if file.is_a? String
444 parse(file)
445 end
446
447 # Fetch URL and parse
448 # Supported:
449 # http://.../
450 # https://.../
451 # file:foo.xml
452 # string:<foo/>
453 def self.from_url(url)
454 if url =~ /^string:(.*)$/m
455 parse($1)
456 elsif url =~ /^file:(.*)$/m
457 from_file($1)
458 elsif url =~ /^http(s?):/
459 ssl = ($1 == "s")
460 # No, seriously - Ruby needs something better than net/http
461 # Something that groks basic auth and queries and redirects automatically:
462 # HTTP_LIBRARY.get_content("http://username:passwd/u.r.l/?query")
463 # URI parsing must go inside the library, client programs
464 # should have nothing to do with it
465
466 # net/http is really inconvenient to use here
467 u = URI.parse(url)
468 # You're not seeing this:
469 if u.query then
470 path = u.path + "?" + u.query
471 else
472 path = u.path
473 end
474 req = Net::HTTP::Get.new(path)
475 if u.userinfo
476 username, passwd = u.userinfo.split(/:/,2)
477 req.basic_auth username, passwd
478 end
479 if ssl
480 # NOTE: You need libopenssl-ruby installed
481 # if you want to use HTTPS. Ubuntu is broken
482 # as it doesn't provide it in the default packages.
483 require 'net/https'
484 http = Net::HTTP.new(u.host, u.port)
485 http.use_ssl = true
486 http.verify_mode = OpenSSL::SSL::VERIFY_NONE
487 else
488 http = Net::HTTP.new(u.host, u.port)
489 end
490
491 res = http.start {|http| http.request(req) }
492 # TODO: Throw a more meaningful exception
493 parse(res.body)
494 else
495 raise "URL protocol #{url} not supported (http, https, file, string are supported)"
496 end
497 end
498
499 # Like CDuce load_xml
500 # The path can be:
501 # * file handler
502 # * URL (a string with :)
503 # * file name (a string without :)
504 def self.load(obj)
505 if obj.is_a? String
506 if obj.include? ":"
507 from_url(obj)
508 else
509 from_file(obj)
510 end
511 else
512 parse(obj)
513 end
514 end
515
516 # Parse XML in mixed stream/tree mode
517 # Basically the idea is that every time we get start element,
518 # we ask the block what to do about it.
519 # If it wants a tree below it, it should call e.tree
520 # If a tree was requested, elements below the current one
521 # are *not* processed. If it wasn't, they are.
522 #
523 # For example:
524 # <foo><bar/></foo><foo2/>
525 # yield <foo> ... </foo>
526 # .complete! called
527 # process <foo2> next
528 #
529 # But:
530 # <foo><bar/></foo><foo2/>
531 # yield <foo> ... </foo>
532 # .complete! not called
533 # process <bar> next
534 #
535 # FIXME: yielded values are not reusable for now
536 # FIXME: make more object-oriented
537 def self.parse_as_twigs(stream)
538 parser = REXML::Parsers::BaseParser.new stream
539 # We don't really need to keep the stack ;-)
540 stack = []
541 while true
542 event = parser.pull
543 case event[0]
544 when :start_element
545 # Now the evil part evil
546 attrs = {}
547 event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
548 node = XML.new(event[1].to_sym, attrs, *event[3..-1])
549
550 # I can't say it's superelegant
551 class <<node
552 attr_accessor :do_complete
553 def complete!
554 if @do_complete
555 @do_complete.call
556 @do_complete = nil
557 end
558 end
559 end
560 node.do_complete = proc{
561 parse_subtree(node, parser)
562 }
563
564 yield(node)
565 if node.do_complete
566 stack.push node
567 node.do_complete = nil # It's too late, complete! shouldn't do anything now
568 end
569 when :end_element
570 stack.pop
571 when :end_document
572 return
573 else
574 # FIXME: Do the right thing.
575 # For now, ignore *everything* else
576 # This is totally incorrect, user might want to
577 # see text, comments and stuff like that anyway
578 end
579 end
580 end
581
582 # Basically it's a copy of self.parse, ugly ...
583 def self.parse_subtree(start_node, parser)
584 stack = [start_node]
585 res = nil
586 while true
587 event = parser.pull
588 case event[0]
589 when :start_element
590 attrs = {}
591 event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
592 stack << XML.new(event[1].to_sym, attrs, *event[3..-1])
593 if stack.size == 1
594 res = stack[0]
595 else
596 stack[-2] << stack[-1]
597 end
598 when :end_element
599 stack.pop
600 return if stack == []
601 # Needs unescaping
602 when :text
603 # Ignore whitespace
604 if stack.size == 0
605 next if event[1] !~ /\S/
606 raise "Non-whitespace text out of document root"
607 end
608 stack[-1] << event[1].xml_unescape
609 # CDATA is already unescaped
610 when :cdata
611 if stack.size == 0
612 raise "CDATA out of the document root"
613 end
614 stack[-1] << event[1]
615 when :end_document
616 raise "Parse error: end_document inside a subtree, tags are not balanced"
617 when :xmldecl,:start_doctype,:end_doctype,:elementdecl,:processing_instruction
618 # Positivery ignore
619 when :comment,:externalentity,:entity,:attlistdecl,:notationdecl
620 # Ignore ???
621 #print "Ignored XML event #{event[0]} when parsing\n"
622 else
623 # Huh ? What's that ?
624 #print "Unknown XML event #{event[0]} when parsing\n"
625 end
626 end
627 res
628
629 end
630
631 # Parse XML using REXML. Available options:
632 # * :extra_entities => Proc or Hash (default = nil)
633 # * :remove_pretty_printing => true/false (default = false)
634 # * :comments => true/false (default = false)
635 # * :pi => true/false (default = false)
636 # * :normalize => true/false (default = false) - normalize
637 # * :multiple_roots => true/false (default=false) - document
638 # can have any number of roots (instread of one).
639 # Return all in an array instead of root/nil.
640 # Also include non-elements (String/PI/Comment) in the return set !!!
641 #
642 # FIXME: :comments/:pi will break everything
643 # if there are comments/PIs outside document root.
644 # Now PIs are outside the document root more often than not,
645 # so we're pretty much screwed here.
646 #
647 # FIXME: Integrate all kinds of parse, and make them support extra options
648 #
649 # FIXME: Benchmark normalize!
650 #
651 # FIXME: Benchmark dup-based Enumerable methods
652 #
653 # FIXME: Make it possible to include bogus XML_Document superparent,
654 # and to make it support out-of-root PIs/Comments
655 def self.parse(stream, options={})
656 extra_entities = options[:extra_entities]
657
658 parser = REXML::Parsers::BaseParser.new stream
659 stack = [[]]
660
661 while true
662 event = parser.pull
663 case event[0]
664 when :start_element
665 attrs = {}
666 event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape(extra_entities) }
667 stack << XML.new(event[1].to_sym, attrs, event[3..-1])
668 stack[-2] << stack[-1]
669 when :end_element
670 stack.pop
671 # Needs unescaping
672 when :text
673 e = event[1].xml_unescape(extra_entities)
674 # Either inside root or in multi-root mode
675 if stack.size > 1 or options[:multiple_roots]
676 stack[-1] << e
677 elsif event[1] !~ /\S/
678 # Ignore out-of-root whitespace in single-root mode
679 else
680 raise "Non-whitespace text out of document root (and not in multiroot mode): #{event[1]}"
681 end
682 # CDATA is already unescaped
683 when :cdata
684 e = event[1]
685 if stack.size > 1 or options[:multiple_roots]
686 stack[-1] << e
687 else
688 raise "CDATA out of the document root"
689 end
690 when :comment
691 next unless options[:comments]
692 e = XML_Comment.new(event[1])
693 if stack.size > 1 or options[:multiple_roots]
694 stack[-1] << e
695 else
696 # FIXME: Ugly !
697 raise "Comments out of the document root"
698 end
699 when :processing_instruction
700 # FIXME: Real PI node
701 next unless options[:pi]
702 e = XML_PI.new(event[1], event[2])
703 if stack.size > 1 or options[:multiple_roots]
704 stack[-1] << e
705 else
706 # FIXME: Ugly !
707 raise "Processing instruction out of the document root"
708 end
709 when :end_document
710 break
711 when :xmldecl,:start_doctype,:end_doctype,:elementdecl
712 # Positivery ignore
713 when :externalentity,:entity,:attlistdecl,:notationdecl
714 # Ignore ???
715 #print "Ignored XML event #{event[0]} when parsing\n"
716 else
717 # Huh ? What's that ?
718 #print "Unknown XML event #{event[0]} when parsing\n"
719 end
720 end
721 roots = stack[0]
722
723 roots.each{|root| root.remove_pretty_printing!} if options[:remove_pretty_printing]
724 # :remove_pretty_printing does :normalize anyway
725 roots.each{|root| root.normalize!} if options[:normalize]
726 if options[:multiple_roots]
727 roots
728 else
729 roots[0]
730 end
731 end
732
733 # Parse a sequence. Equivalent to XML.parse(stream, :multiple_roots => true).
734 def self.parse_sequence(stream, options={})
735 o = options.dup
736 o[:multiple_roots] = true
737 parse(stream, o)
738 end
739
740 # Renormalize a string containing XML document
741 def self.renormalize(stream)
742 parse(stream).to_s
743 end
744
745 # Renormalize a string containing a sequence of XML documents
746 # and strings
747 # XMLrenormalize_sequence("<hello />, <world></world>!") =>
748 # "<hello/>, <world/>!"
749 def self.renormalize_sequence(stream)
750 parse_sequence(stream).to_s
751 end
752 end
753
754 # Instance methods (other than those of Enumerable)
755 class XML
756 attr_accessor :name, :attrs, :contents
757
758 # initialize can be run in many ways
759 # * XML.new
760 # * XML.new(:tag_symbol)
761 # * XML.new(:tag_symbol, {attributes})
762 # * XML.new(:tag_symbol, "children", "more", XML.new(...))
763 # * XML.new(:tag_symbol, {attributes}, "and", "children")
764 # * XML.new(:tag_symbol) { monadic code }
765 # * XML.new(:tag_symbol, {attributes}) { monadic code }
766 #
767 # Or even:
768 # * XML.new(:tag_symbol, "children") { and some monadic code }
769 # * XML.new(:tag_symbol, {attributes}, "children") { and some monadic code }
770 # But typically you won't be mixing these two style
771 #
772 # Attribute values can will be converted to strings
773 def initialize(*args, &blk)
774 @name = nil
775 @attrs = {}
776 @contents = []
777 @name = args.shift if args.size != 0
778 if args.size != 0 and args[0].is_a? Hash
779 args.shift.each{|k,v|
780 # Do automatic conversion here
781 # This also assures that the hashes are *not* shared
782 self[k] = v
783 }
784 end
785 # Expand Arrays passed as arguments
786 self << args
787 # FIXME: We'd rather not have people say @name = :foo there :-)
788 if blk
789 instance_eval(&blk)
790 end
791 end
792
793 # Convert to a well-formatted XML
794 def to_s
795 "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
796 if @contents.size == 0
797 "/>"
798 else
799 ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.to_s end}.join + "</#{name}>"
800 end
801 end
802
803 # Convert to a well-formatted XML, but without children information.
804 # This is a reasonable format for irb and debugging.
805 # If you want to see a few levels of children, call inspect(2) and so on
806 def inspect(include_children=0)
807 "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
808 if @contents.size == 0
809 "/>"
810 elsif include_children == 0
811 ">...</#{name}>"
812 else
813 ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.inspect(include_children-1) end}.join + "</#{name}>"
814 end
815 end
816
817 # Read attributes.
818 # Also works with pseudoattributes:
819 # img[:@x] == img.child(:x).text # or nil if there isn't any.
820 def [](key)
821 if key.to_s[0] == ?@
822 tag = key.to_s[1..-1].to_sym
823 c = child(tag)
824 if c
825 c.text
826 else
827 nil
828 end
829 else
830 @attrs[key]
831 end
832 end
833
834 # Set attributes.
835 # Value is automatically converted to String, so you can say:
836 # img[:x] = 200
837 # Also works with pseudoattributes:
838 # foo[:@bar] = "x"
839 def []=(key, value)
840 if key.to_s[0] == ?@
841 tag = key.to_s[1..-1].to_sym
842 c = child(tag)
843 if c
844 c.contents = [value.to_s]
845 else
846 self << XML.new(tag, value.to_s)
847 end
848 else
849 @attrs[key] = value.to_s
850 end
851 end
852
853 # Add children.
854 # Possible uses:
855 # * Add single element
856 # self << xml(...)
857 # self << "foo"
858 # Add nothing:
859 # self << nil
860 # Add multiple elements (also works recursively):
861 # self << [a, b, c]
862 # self << [a, [b, c], d]
863 def <<(cnt)
864 if cnt.nil?
865 # skip
866 elsif cnt.is_a? Array
867 cnt.each{|elem| self << elem}
868 else
869 @contents << cnt
870 end
871 self
872 end
873
874 # Equality test, works as if XMLs were normalized, so:
875 # XML.new(:foo, "Hello, ", "world") == XML.new(:foo, "Hello, world")
876 def ==(x)
877 return false unless x.is_a? XML
878 return false unless name == x.name and attrs == x.attrs
879 # Now the hard part, strings can be split in different ways
880 # empty string children are possible etc.
881 self_i = 0
882 othr_i = 0
883 while self_i != contents.size or othr_i != x.contents.size
884 # Ignore ""s
885 if contents[self_i].is_a? String and contents[self_i] == ""
886 self_i += 1
887 next
888 end
889 if x.contents[othr_i].is_a? String and x.contents[othr_i] == ""
890 othr_i += 1
891 next
892 end
893
894 # If one is finished and the other contains non-empty elements,
895 # they are not equal
896 return false if self_i == contents.size or othr_i == x.contents.size
897
898 # Are they both Strings ?
899 # Strings can be divided in different ways, and calling normalize!
900 # here would be rather expensive, so let's use this complicated
901 # algorithm
902 if contents[self_i].is_a? String and x.contents[othr_i].is_a? String
903 a = contents[self_i]
904 b = x.contents[othr_i]
905 self_i += 1
906 othr_i += 1
907 while a != "" or b != ""
908 if a == b
909 a = ""
910 b = ""
911 elsif a.size > b.size and a[0, b.size] == b
912 a = a[b.size..-1]
913 if x.contents[othr_i].is_a? String
914 b = x.contents[othr_i]
915 othr_i += 1
916 next
917 end
918 elsif b.size > a.size and b[0, a.size] == a
919 b = b[a.size..-1]
920 if contents[self_i].is_a? String
921 a = contents[self_i]
922 self_i += 1
923 next
924 end
925 else
926 return false
927 end
928 end
929 next
930 end
931
932 # OK, so at least one of them is not a String.
933 # Hopefully they're either both XMLs or one is an XML and the
934 # other is a String. It is also possible that contents contains
935 # something illegal, but we aren't catching that,
936 # so xml(:foo, Garbage.new) is going to at least equal itself.
937 # And we aren't, because xml(:foo, Garbage.new) == xml(:bar, Garbage.new)
938 # is going to return an honest false, and incoherent sanity
939 # check is worse than no sanity check.
940 #
941 # Oh yeah, they can be XML_PI or XML_Comment. In such case, this
942 # is ok.
943 return false unless contents[self_i] == x.contents[othr_i]
944 self_i += 1
945 othr_i += 1
946 end
947 return true
948 end
949
950 alias_method :real_method_missing, :method_missing
951 # Define all foo!-methods for monadic interface, so you can write:
952 #
953 def method_missing(meth, *args, &blk)
954 if meth.to_s =~ /^(.*)!$/
955 self << XML.new($1.to_sym, *args, &blk)
956 else
957 real_method_missing(meth, *args, &blk)
958 end
959 end
960
961 # Make monadic interface more "official"
962 # * node.exec! { foo!; bar! }
963 # is equivalent to
964 # * node << xml(:foo) << xml(:bar)
965 def exec!(&blk)
966 instance_eval(&blk)
967 end
968
969 # Select a subtree
970 # NOTE: Uses object_id of the start/end tags !
971 # They have to be the same, not just identical !
972 # <foo>0<a>1</a><b/><c/><d>2</d><e/>3</foo>.range(<a>1</a>, <d>2</d>)
973 # returns
974 # <foo><b/><c/></foo>
975 # start and end and their descendants are not included in
976 # the result tree.
977 # Either start or end can be nil.
978 # * If both start and end are nil, return whole tree.
979 # * If start is nil, return subtree up to range_end.
980 # * If start is not inside the tree, return nil.
981 # * If end is nil, return subtree from start
982 # * If end is not inside the tree, return subtree from start.
983 # * If end is before or below start, or they're the same node, the result is unspecified.
984 # * if end comes directly after start, or as first node when start==nil, return path reaching there.
985 def range(range_start, range_end, end_reached_cb=nil)
986 if range_start == nil
987 result = XML.new(name, attrs)
988 else
989 result = nil
990 end
991 @contents.each {|c|
992 # end reached !
993 if range_end and c.object_id == range_end.object_id
994 end_reached_cb.call if end_reached_cb
995 break
996 end
997 # start reached !
998 if range_start and c.object_id == range_start.object_id
999 result = XML.new(name, attrs)
1000 next
1001 end
1002 if result # We already started
1003 if c.is_a? XML
1004 break_me = false
1005 result.add! c.range(nil, range_end, lambda{ break_me = true })
1006 if break_me
1007 end_reached_cb.call if end_reached_cb
1008 break
1009 end
1010 else # String/XML_PI/XML_Comment
1011 result.add! c
1012 end
1013 else
1014 # Strings/XML_PI/XML_Comment obviously cannot start a range
1015 if c.is_a? XML
1016 break_me = false
1017 r = c.range(range_start, range_end, lambda{ break_me = true })
1018 if r
1019 # start reached !
1020 result = XML.new(name, attrs, r)
1021 end
1022 if break_me
1023 # end reached !
1024 end_reached_cb.call if end_reached_cb
1025 break
1026 end
1027 end
1028 end
1029 }
1030 return result
1031 end
1032
1033 # XML#subsequence is similar to XML#range, but instead of
1034 # trimmed subtree in returns a list of elements
1035 # The same elements are included in both cases, but here
1036 # we do not include any parents !
1037 #
1038 # <foo><a/><b/><c/></foo>.range(a,c) => <foo><b/></foo>
1039 # <foo><a/><b/><c/></foo>.subsequence(a,c) => <b/>
1040 #
1041 # <foo><a><a1/></a><b/><c/></foo>.range(a1,c) => <foo><a/><b/></foo> # Does <a/> make sense ?
1042 # <foo><a><a1/></a><b/><c/></foo>.subsequence(a1,c) => <b/>
1043 #
1044 # <foo><a><a1/><a2/></a><b/><c/></foo>.range(a1,c) => <foo><a><a2/></a><b/></foo>
1045 # <foo><a><a1/><a2/></a><b/><c/></foo>.subsequence(a1,c) => <a2/><b/>
1046 #
1047 # And we return [], not nil if nothing matches
1048 def subsequence(range_start, range_end, start_seen_cb=nil, end_seen_cb=nil)
1049 result = []
1050 start_seen = range_start.nil?
1051 @contents.each{|c|
1052 if range_end and range_end.object_id == c.object_id
1053 end_seen_cb.call if end_seen_cb
1054 break
1055 end
1056 if range_start and range_start.object_id == c.object_id
1057 start_seen = true
1058 start_seen_cb.call if start_seen_cb
1059 next
1060 end
1061 if start_seen
1062 if c.is_a? XML
1063 break_me = false
1064 result += c.subsequence(nil, range_end, nil, lambda{break_me=true})
1065 break if break_me
1066 else # String/XML_PI/XML_Comment
1067 result << c
1068 end
1069 else
1070 # String/XML_PI/XML_Comment cannot start a subsequence
1071 if c.is_a? XML
1072 break_me = false
1073 result += c.subsequence(range_start, range_end, lambda{start_seen=true}, lambda{break_me=true})
1074 break if break_me
1075 end
1076 end
1077 }
1078 # Include starting tag if it was right from the range_start
1079 # Otherwise, return just the raw sequence
1080 result = [XML.new(@name, @attrs, result)] if range_start == nil
1081 return result
1082 end
1083
1084 # =~ for a few reasonable patterns
1085 def =~(pattern)
1086 if pattern.is_a? Symbol
1087 @name == pattern
1088 elsif pattern.is_a? Regexp
1089 rv = text =~ pattern
1090 else # Hash, Pattern_any, Pattern_all
1091 pattern === self
1092 end
1093 end
1094
1095 # Get rid of pretty-printing whitespace. Also normalizes the XML.
1096 def remove_pretty_printing!(exceptions=nil)
1097 normalize!
1098 real_remove_pretty_printing!(exceptions)
1099 normalize!
1100 end
1101
1102 # normalize! is already recursive, so only one call at top level is needed.
1103 # This helper method lets us avoid extra calls to normalize!.
1104 def real_remove_pretty_printing!(exceptions=nil)
1105 return if exceptions and exceptions.include? @name
1106 each{|c|
1107 if c.is_a? String
1108 c.sub!(/^\s+/, "")
1109 c.sub!(/\s+$/, "")
1110 c.gsub!(/\s+/, " ")
1111 elsif c.is_a? XML_PI or c.is_a? XML_Comment
1112 else
1113 c.real_remove_pretty_printing!(exceptions)
1114 end
1115 }
1116 end
1117
1118 protected :real_remove_pretty_printing!
1119
1120 # Add pretty-printing whitespace. Also normalizes the XML.
1121 def add_pretty_printing!
1122 normalize!
1123 real_add_pretty_printing!
1124 normalize!
1125 end
1126
1127 def real_add_pretty_printing!(indent = "")
1128 return if @contents.empty?
1129 each{|c|
1130 if c.is_a? XML
1131 c.real_add_pretty_printing!(indent+" ")
1132 elsif c.is_a? String
1133 c.gsub!(/\n\s*/, "\n#{indent} ")
1134 end
1135 }
1136 @contents = @contents.inject([]){|children, c| children + ["\n#{indent} ", c]}+["\n#{indent}"]
1137 end
1138
1139 protected :real_add_pretty_printing!
1140
1141 alias_method :raw_dup, :dup
1142 # This is not a trivial method - first it does a *deep* copy,
1143 # second it takes a block which is instance_eval'ed,
1144 # so you can do things like:
1145 # * node.dup{ @name = :foo }
1146 # * node.dup{ self[:color] = "blue" }
1147 def dup(&blk)
1148 new_obj = self.raw_dup
1149 # Attr values stay shared - ugly
1150 new_obj.attrs = new_obj.attrs.dup
1151 new_obj.contents = new_obj.contents.map{|c| c.dup}
1152
1153 new_obj.instance_eval(&blk) if blk
1154 return new_obj
1155 end
1156
1157
1158 # Add some String children (all attributes get to_s'ed)
1159 def text!(*args)
1160 args.each{|s| self << s.to_s}
1161 end
1162 # Add XML child
1163 def xml!(*args, &blk)
1164 @contents << XML.new(*args, &blk)
1165 end
1166
1167 alias_method :add!, :<<
1168
1169 # Normalization means joining strings
1170 # and getting rid of ""s, recursively
1171 def normalize!
1172 new_contents = []
1173 @contents.each{|c|
1174 if c.is_a? String
1175 next if c == ""
1176 if new_contents[-1].is_a? String
1177 new_contents[-1] += c
1178 next
1179 end
1180 else
1181 c.normalize!
1182 end
1183 new_contents.push c
1184 }
1185 @contents = new_contents
1186 end
1187
1188 # Return text below the node, stripping all XML tags,
1189 # "<foo>Hello, <bar>world</bar>!</foo>".xml_parse.text
1190 # returns "Hello, world!"
1191 def text
1192 res = ""
1193 @contents.each{|c|
1194 if c.is_a? XML
1195 res << c.text
1196 elsif c.is_a? String
1197 res << c
1198 end # Ignore XML_PI/XML_Comment
1199 }
1200 res
1201 end
1202
1203 # Equivalent to node.children(pat, *rest)[0]
1204 # Returns nil if there aren't any matching children
1205 def child(pat=nil, *rest)
1206 children(pat, *rest) {|c|
1207 return c
1208 }
1209 return nil
1210 end
1211
1212 # Equivalent to node.descendants(pat, *rest)[0]
1213 # Returns nil if there aren't any matching descendants
1214 def descendant(pat=nil, *rest)
1215 descendants(pat, *rest) {|c|
1216 return c
1217 }
1218 return nil
1219 end
1220
1221 # XML#children(pattern, more_patterns)
1222 # Return all children of a node with tags matching tag.
1223 # Also:
1224 # * children(:a, :b) == children(:a).children(:b)
1225 # * children(:a, :*, :c) == children(:a).descendants(:c)
1226 def children(pat=nil, *rest, &blk)
1227 return descendants(*rest, &blk) if pat == :*
1228 res = []
1229 @contents.each{|c|
1230 if pat.nil? or pat === c
1231 if rest == []
1232 res << c
1233 yield c if block_given?
1234 else
1235 res += c.children(*rest, &blk)
1236 end
1237 end
1238 }
1239 res
1240 end
1241
1242 # * XML#descendants
1243 # * XML#descendants(pattern)
1244 # * XML#descendants(pattern, more_patterns)
1245 #
1246 # Return all descendants of a node matching the pattern.
1247 # If pattern==nil, simply return all descendants.
1248 # Optionally run a block on each of them if a block was given.
1249 # If pattern==nil, also match Strings !
1250 def descendants(pat=nil, *rest, &blk)
1251 res = []
1252 @contents.each{|c|
1253 if pat.nil? or pat === c
1254 if rest == []
1255 res << c
1256 yield c if block_given?
1257 else
1258 res += c.children(*rest, &blk)
1259 end
1260 end
1261 if c.is_a? XML
1262 res += c.descendants(pat, *rest, &blk)
1263 end
1264 }
1265 res
1266 end
1267
1268 # Change elements based on pattern
1269 def deep_map(pat, &blk)
1270 if self =~ pat
1271 yield self
1272 else
1273 r = XML.new(self.name, self.attrs)
1274 each{|c|
1275 if c.is_a? XML
1276 r << c.deep_map(pat, &blk)
1277 else
1278 r << c
1279 end
1280 }
1281 r
1282 end
1283 end
1284
1285 # FIXME: do we want a shallow or a deep copy here ?
1286 # Map children, but leave the name/attributes
1287 def map(pat=nil)
1288 r = XML.new(self.name, self.attrs)
1289 each{|c|
1290 if !pat || c =~ pat
1291 r << yield(c)
1292 else
1293 r << c
1294 end
1295 }
1296 r
1297 end
1298 end
1299
1300 # FIXME: Is this even sane ?
1301 # * What about escaping and all that stuff ?
1302 # * Rest of the code assumes that everything is either XML or String
1303 class XML_PI
1304 def initialize(c, t)
1305 @c = c
1306 @t = t
1307 end
1308 def to_s
1309 "<?#{@c}#{@t}?>"
1310 end
1311 end
1312
1313 # FIXME: Is this even sane ?
1314 # * What about escaping and all that stuff ?
1315 # * Rest of the code assumes that everything is either XML or String
1316 # * There are some limitations on where one can put -s in the comment. Do not overdo.
1317 class XML_Comment
1318 def initialize(c)
1319 @c = c
1320 end
1321 def to_s
1322 "<!--#{@c}-->"
1323 end
1324 end
1325
1326 # Syntactic sugar for XML.new
1327 def xml(*args, &blk)
1328 XML.new(*args, &blk)
1329 end
1330
1331 # xml! in XML { ... } - context adds node to parent
1332 # xml! in main context prints the argument (and returns it anyway)
1333 def xml!(*args, &blk)
1334 node = xml(*args, &blk)
1335 print node
1336 node
1337 end
1338
1339 # Perl 6 is supposed to have native support for something like that.
1340 # Constructor takes multiple patterns. The object matches if they all match.
1341 #
1342 # Usage:
1343 # case foo
1344 # when all(:foo, {:color => 'blue'}, /Hello/)
1345 # print foo
1346 # end
1347 class Patterns_all
1348 def initialize(*patterns)
1349 @patterns = patterns
1350 end
1351 def ===(obj)
1352 @patterns.all?{|p| p === obj}
1353 end
1354 end
1355
1356 def all(*patterns)
1357 Patterns_all.new(*patterns)
1358 end
1359
1360 # Perl 6 is supposed to have native support for something like that.
1361 # Constructor takes multiple patterns. The object matches if they all match.
1362 #
1363 # Usage:
1364 # case foo
1365 # when all(:foo, any({:color => 'blue'}, {:color => 'red'}), /Hello/)
1366 # print foo
1367 # end
1368 class Patterns_any
1369 def initialize(*patterns)
1370 @patterns = patterns
1371 end
1372 def ===(obj)
1373 @patterns.any?{|p| p === obj}
1374 end
1375 end
1376
1377 def any(*patterns)
1378 Patterns_any.new(*patterns)
1379 end
Generated using the rcov code coverage analysis tool for Ruby version 0.8.0.