C0 code coverage information


Code reported as executed by Ruby looks like this...
and this: this line is also marked as covered.
Lines considered as run by rcov, but not reported by Ruby, look like this,
and this: these lines were inferred by rcov (using simple heuristics).
Finally, here's a line marked as not executed.
Name Total lines Lines of code Total coverage Code coverage
magic_xml.rb 1379 964
91.1% 
92.9% 
   1 # Needed for parsing
   2 require 'rexml/parsers/baseparser'
   3 # Needed for fetching XMLs from the Internet
   4 require 'uri'
   5 require 'net/http'
   6 
   7 # FIXME: Make comment formatting RDoc-friendly. It's not always so now.
   8 
   9 # In Ruby 2 Symbol will be a subclass of String, and
  10 # this won't be needed any more. Before then...
  11 class Symbol
  12     include Comparable
  13     def <=>(other)
  14         raise ArgumentError.new("comparison of #{self.class} with #{other.class} failed") unless other.is_a? Symbol
  15         to_s <=> other.to_s
  16     end
  17     
  18     alias_method :eqeqeq_before_magic_xml, :===
  19     def ===(*args, &blk)
  20         if args.size >= 1 and args[0].is_a? XML
  21             self == args[0].name
  22         else
  23             eqeqeq_before_magic_xml(*args, &blk)
  24         end
  25     end
  26 end
  27 
  28 class Hash
  29     alias_method :eqeqeq_before_magic_xml, :===
  30     def ===(*args, &blk)
  31         if args.size >= 1 and args[0].is_a? XML
  32             all?{|k,v| v === args[0][k]}
  33         else
  34             eqeqeq_before_magic_xml(*args, &blk)
  35         end
  36     end
  37 end
  38 
  39 class String
  40     # Escape string for output as XML text (< > &)
  41     def xml_escape
  42         replacements = {"<" => "&lt;", ">" => "&gt;", "&" => "&amp;" }
  43         gsub(/([<>&])/) { replacements[$1] }
  44     end
  45     # Escape characters for output as XML attribute values (< > & ' ")
  46     def xml_attr_escape
  47         replacements = {"<" => "&lt;", ">" => "&gt;", "&" => "&amp;", "\"" => "&quot;", "'" => "&apos;"}
  48         gsub(/([<>&\'\"])/) { replacements[$1] }
  49     end
  50     # Unescape entities
  51     # Supports:
  52     # * Full set of HTML-compatible named entities
  53     # * Decimal entities &#1234;
  54     # * Hex entities &#xA0b1;
  55     def xml_unescape(extra_entities=nil)
  56         @@xhtml_entity_replacements ||= {
  57             'nbsp' => 160,
  58             'iexcl' => 161,
  59             'cent' => 162,
  60             'pound' => 163,
  61             'curren' => 164,
  62             'yen' => 165,
  63             'brvbar' => 166,
  64             'sect' => 167,
  65             'uml' => 168,
  66             'copy' => 169,
  67             'ordf' => 170,
  68             'laquo' => 171,
  69             'not' => 172,
  70             'shy' => 173,
  71             'reg' => 174,
  72             'macr' => 175,
  73             'deg' => 176,
  74             'plusmn' => 177,
  75             'sup2' => 178,
  76             'sup3' => 179,
  77             'acute' => 180,
  78             'micro' => 181,
  79             'para' => 182,
  80             'middot' => 183,
  81             'cedil' => 184,
  82             'sup1' => 185,
  83             'ordm' => 186,
  84             'raquo' => 187,
  85             'frac14' => 188,
  86             'frac12' => 189,
  87             'frac34' => 190,
  88             'iquest' => 191,
  89             'Agrave' => 192,
  90             'Aacute' => 193,
  91             'Acirc' => 194,
  92             'Atilde' => 195,
  93             'Auml' => 196,
  94             'Aring' => 197,
  95             'AElig' => 198,
  96             'Ccedil' => 199,
  97             'Egrave' => 200,
  98             'Eacute' => 201,
  99             'Ecirc' => 202,
 100             'Euml' => 203,
 101             'Igrave' => 204,
 102             'Iacute' => 205,
 103             'Icirc' => 206,
 104             'Iuml' => 207,
 105             'ETH' => 208,
 106             'Ntilde' => 209,
 107             'Ograve' => 210,
 108             'Oacute' => 211,
 109             'Ocirc' => 212,
 110             'Otilde' => 213,
 111             'Ouml' => 214,
 112             'times' => 215,
 113             'Oslash' => 216,
 114             'Ugrave' => 217,
 115             'Uacute' => 218,
 116             'Ucirc' => 219,
 117             'Uuml' => 220,
 118             'Yacute' => 221,
 119             'THORN' => 222,
 120             'szlig' => 223,
 121             'agrave' => 224,
 122             'aacute' => 225,
 123             'acirc' => 226,
 124             'atilde' => 227,
 125             'auml' => 228,
 126             'aring' => 229,
 127             'aelig' => 230,
 128             'ccedil' => 231,
 129             'egrave' => 232,
 130             'eacute' => 233,
 131             'ecirc' => 234,
 132             'euml' => 235,
 133             'igrave' => 236,
 134             'iacute' => 237,
 135             'icirc' => 238,
 136             'iuml' => 239,
 137             'eth' => 240,
 138             'ntilde' => 241,
 139             'ograve' => 242,
 140             'oacute' => 243,
 141             'ocirc' => 244,
 142             'otilde' => 245,
 143             'ouml' => 246,
 144             'divide' => 247,
 145             'oslash' => 248,
 146             'ugrave' => 249,
 147             'uacute' => 250,
 148             'ucirc' => 251,
 149             'uuml' => 252,
 150             'yacute' => 253,
 151             'thorn' => 254,
 152             'yuml' => 255,
 153             'quot' => 34,
 154             'apos' => 39, # Wasn't present in the HTML entities set, but is defined in XML standard
 155             'amp' => 38,
 156             'lt' => 60,
 157             'gt' => 62,
 158             'OElig' => 338,
 159             'oelig' => 339,
 160             'Scaron' => 352,
 161             'scaron' => 353,
 162             'Yuml' => 376,
 163             'circ' => 710,
 164             'tilde' => 732,
 165             'ensp' => 8194,
 166             'emsp' => 8195,
 167             'thinsp' => 8201,
 168             'zwnj' => 8204,
 169             'zwj' => 8205,
 170             'lrm' => 8206,
 171             'rlm' => 8207,
 172             'ndash' => 8211,
 173             'mdash' => 8212,
 174             'lsquo' => 8216,
 175             'rsquo' => 8217,
 176             'sbquo' => 8218,
 177             'ldquo' => 8220,
 178             'rdquo' => 8221,
 179             'bdquo' => 8222,
 180             'dagger' => 8224,
 181             'Dagger' => 8225,
 182             'permil' => 8240,
 183             'lsaquo' => 8249,
 184             'rsaquo' => 8250,
 185             'euro' => 8364,
 186             'fnof' => 402,
 187             'Alpha' => 913,
 188             'Beta' => 914,
 189             'Gamma' => 915,
 190             'Delta' => 916,
 191             'Epsilon' => 917,
 192             'Zeta' => 918,
 193             'Eta' => 919,
 194             'Theta' => 920,
 195             'Iota' => 921,
 196             'Kappa' => 922,
 197             'Lambda' => 923,
 198             'Mu' => 924,
 199             'Nu' => 925,
 200             'Xi' => 926,
 201             'Omicron' => 927,
 202             'Pi' => 928,
 203             'Rho' => 929,
 204             'Sigma' => 931,
 205             'Tau' => 932,
 206             'Upsilon' => 933,
 207             'Phi' => 934,
 208             'Chi' => 935,
 209             'Psi' => 936,
 210             'Omega' => 937,
 211             'alpha' => 945,
 212             'beta' => 946,
 213             'gamma' => 947,
 214             'delta' => 948,
 215             'epsilon' => 949,
 216             'zeta' => 950,
 217             'eta' => 951,
 218             'theta' => 952,
 219             'iota' => 953,
 220             'kappa' => 954,
 221             'lambda' => 955,
 222             'mu' => 956,
 223             'nu' => 957,
 224             'xi' => 958,
 225             'omicron' => 959,
 226             'pi' => 960,
 227             'rho' => 961,
 228             'sigmaf' => 962,
 229             'sigma' => 963,
 230             'tau' => 964,
 231             'upsilon' => 965,
 232             'phi' => 966,
 233             'chi' => 967,
 234             'psi' => 968,
 235             'omega' => 969,
 236             'thetasym' => 977,
 237             'upsih' => 978,
 238             'piv' => 982,
 239             'bull' => 8226,
 240             'hellip' => 8230,
 241             'prime' => 8242,
 242             'Prime' => 8243,
 243             'oline' => 8254,
 244             'frasl' => 8260,
 245             'weierp' => 8472,
 246             'image' => 8465,
 247             'real' => 8476,
 248             'trade' => 8482,
 249             'alefsym' => 8501,
 250             'larr' => 8592,
 251             'uarr' => 8593,
 252             'rarr' => 8594,
 253             'darr' => 8595,
 254             'harr' => 8596,
 255             'crarr' => 8629,
 256             'lArr' => 8656,
 257             'uArr' => 8657,
 258             'rArr' => 8658,
 259             'dArr' => 8659,
 260             'hArr' => 8660,
 261             'forall' => 8704,
 262             'part' => 8706,
 263             'exist' => 8707,
 264             'empty' => 8709,
 265             'nabla' => 8711,
 266             'isin' => 8712,
 267             'notin' => 8713,
 268             'ni' => 8715,
 269             'prod' => 8719,
 270             'sum' => 8721,
 271             'minus' => 8722,
 272             'lowast' => 8727,
 273             'radic' => 8730,
 274             'prop' => 8733,
 275             'infin' => 8734,
 276             'ang' => 8736,
 277             'and' => 8743,
 278             'or' => 8744,
 279             'cap' => 8745,
 280             'cup' => 8746,
 281             'int' => 8747,
 282             'there4' => 8756,
 283             'sim' => 8764,
 284             'cong' => 8773,
 285             'asymp' => 8776,
 286             'ne' => 8800,
 287             'equiv' => 8801,
 288             'le' => 8804,
 289             'ge' => 8805,
 290             'sub' => 8834,
 291             'sup' => 8835,
 292             'nsub' => 8836,
 293             'sube' => 8838,
 294             'supe' => 8839,
 295             'oplus' => 8853,
 296             'otimes' => 8855,
 297             'perp' => 8869,
 298             'sdot' => 8901,
 299             'lceil' => 8968,
 300             'rceil' => 8969,
 301             'lfloor' => 8970,
 302             'rfloor' => 8971,
 303             'lang' => 9001,
 304             'rang' => 9002,
 305             'loz' => 9674,
 306             'spades' => 9824,
 307             'clubs' => 9827,
 308             'hearts' => 9829,
 309             'diams' => 9830,
 310         }
 311         gsub(/&(?:([a-zA-Z]+)|#([0-9]+)|#x([a-fA-F0-9]+));/) {
 312             if $1 then
 313                 v = @@xhtml_entity_replacements[$1]
 314                 # Nonstandard entity
 315                 unless v
 316                     if extra_entities.is_a? Proc
 317                         v = extra_entities.call($1)
 318                     # Well, we expect a Hash here, but any container will do.
 319                     # As long as it's not a nil.
 320                     elsif extra_entities
 321                         v = extra_entities[$1]
 322                     end
 323                 end
 324                 raise "Unknown escape #{$1}" unless v
 325             elsif $2
 326                 v = $2.to_i
 327             else
 328                 v = $3.hex
 329             end
 330             # v can be a String or an Integer
 331             if v.is_a? String then v else [v].pack('U') end
 332         }
 333     end
 334     def xml_parse
 335         XML.parse(self)
 336     end
 337 end
 338 
 339 class File
 340     def xml_parse
 341         XML.parse(self)
 342     end
 343 end
 344 
 345 class Array
 346     # children of any element
 347     def children(*args, &blk)
 348         res = []
 349         each{|c|
 350             res += c.children(*args, &blk) if c.is_a? XML
 351         }
 352         res
 353     end
 354     # descendants of any element
 355     def descendants(*args, &blk)
 356         res = []
 357         each{|c|
 358             res += c.descendants(*args, &blk) if c.is_a? XML
 359         }
 360         res
 361     end
 362 end
 363 
 364 # Methods of Enumerable.
 365 # It is not easy to design good methods, because XML
 366 # is not really "a container", it just acts as one sometimes.
 367 # Generally:
 368 # * Methods that return nil should work
 369 # * Methods that return an element should work
 370 # * Methods that return a container should return XML container, not Array
 371 # * Conversion methods should convert
 372 #
 373 # FIXME: Many methods use .dup, but do we want a shallow or a deep copy ?
 374 class XML
 375     include Enumerable
 376     # Default any? is ok
 377     # Default all? is ok
 378 
 379     # Iterate over children, possibly with a selector
 380     def each(*selector, &blk)
 381         children(*selector, &blk)
 382         self
 383     end
 384 
 385     # Sort XML children of XML element.
 386     def sort_by(*args, &blk)
 387         self.dup{ @contents = @contents.select{|c| c.is_a? XML}.sort_by(*args, &blk) }
 388     end
 389 
 390     # Sort children of XML element.
 391     def children_sort_by(*args, &blk)
 392         self.dup{ @contents = @contents.sort_by(*args, &blk) }
 393     end
 394 
 395     # Sort children of XML element.
 396     #
 397     # Using sort is highly wrong, as XML (and XML-extras) is not even Comparable.
 398     # Use sort_by instead.
 399     #
 400     # Unless you define your own XML#<=> operator, or do something equally weird.
 401     def sort(*args, &blk)
 402         self.dup{ @contents = @contents.sort(*args, &blk) }
 403     end
 404 
 405     #collect/map
 406     #detect/find
 407     #each_cons
 408     #each_slice
 409     #each_with_index
 410     #to_a
 411     #entries
 412     #enum_cons
 413     #enum_slice
 414     #enum
 415     # grep
 416     # include?/member?
 417     # inject
 418     # max/min
 419     # max_by/min_by - Ruby 1.9
 420     # partition
 421     # reject
 422     # sort
 423     # sort_by
 424     # to_set
 425     # zip
 426     # And Enumerable::Enumerator-generating methods
 427 end
 428 
 429 # Class methods
 430 class XML
 431     # XML.foo! == xml!(:foo)
 432     # XML.foo  == xml(:foo)
 433     def self.method_missing(meth, *args, &blk) 
 434         if meth.to_s =~ /^(.*)!$/
 435             xml!($1.to_sym, *args, &blk)
 436         else
 437             XML.new(meth, *args, &blk)
 438         end
 439     end
 440 
 441     # Read file and parse
 442     def self.from_file(file)
 443         file = File.open(file) if file.is_a? String
 444         parse(file)
 445     end
 446 
 447     # Fetch URL and parse
 448     # Supported:
 449     # http://.../
 450     # https://.../
 451     # file:foo.xml
 452     # string:<foo/>
 453     def self.from_url(url)
 454         if url =~ /^string:(.*)$/m
 455             parse($1)
 456         elsif url =~ /^file:(.*)$/m
 457             from_file($1)
 458         elsif url =~ /^http(s?):/
 459             ssl = ($1 == "s")
 460             # No, seriously - Ruby needs something better than net/http
 461             # Something that groks basic auth and queries and redirects automatically:
 462             # HTTP_LIBRARY.get_content("http://username:passwd/u.r.l/?query")
 463             # URI parsing must go inside the library, client programs
 464             # should have nothing to do with it
 465 
 466             # net/http is really inconvenient to use here
 467             u = URI.parse(url)
 468             # You're not seeing this:
 469             if u.query then
 470                 path = u.path + "?" + u.query
 471             else
 472                 path = u.path
 473             end
 474             req = Net::HTTP::Get.new(path)
 475             if u.userinfo
 476                 username, passwd = u.userinfo.split(/:/,2)
 477                 req.basic_auth username, passwd
 478             end
 479             if ssl
 480                 # NOTE: You need libopenssl-ruby installed
 481                 # if you want to use HTTPS. Ubuntu is broken
 482                 # as it doesn't provide it in the default packages.
 483                 require 'net/https'
 484                 http = Net::HTTP.new(u.host, u.port)
 485                 http.use_ssl = true
 486                 http.verify_mode = OpenSSL::SSL::VERIFY_NONE
 487             else
 488                 http = Net::HTTP.new(u.host, u.port)
 489             end
 490             
 491             res = http.start {|http| http.request(req) }
 492             # TODO: Throw a more meaningful exception
 493             parse(res.body)
 494         else
 495             raise "URL protocol #{url} not supported (http, https, file, string are supported)"
 496         end
 497     end
 498 
 499     # Like CDuce load_xml
 500     # The path can be:
 501     # * file handler
 502     # * URL (a string with :)
 503     # * file name (a string without :)
 504     def self.load(obj)
 505         if obj.is_a? String
 506             if obj.include? ":"
 507                 from_url(obj)
 508             else
 509                 from_file(obj)
 510             end
 511         else
 512             parse(obj)
 513         end
 514     end
 515 
 516     # Parse XML in mixed stream/tree mode
 517     # Basically the idea is that every time we get start element,
 518     # we ask the block what to do about it.
 519     # If it wants a tree below it, it should call e.tree
 520     # If a tree was requested, elements below the current one
 521     # are *not* processed. If it wasn't, they are.
 522     #
 523     # For example:
 524     #  <foo><bar/></foo><foo2/>
 525     #  yield <foo> ... </foo>
 526     #  .complete! called
 527     #  process <foo2> next
 528     #
 529     # But:
 530     #  <foo><bar/></foo><foo2/>
 531     #  yield <foo> ... </foo>
 532     #  .complete! not called
 533     #  process <bar> next
 534     #
 535     # FIXME: yielded values are not reusable for now
 536     # FIXME: make more object-oriented
 537     def self.parse_as_twigs(stream)
 538         parser = REXML::Parsers::BaseParser.new stream
 539         # We don't really need to keep the stack ;-)
 540         stack = []
 541         while true
 542             event = parser.pull
 543             case event[0]
 544             when :start_element
 545                 # Now the evil part evil
 546                 attrs = {}
 547                 event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
 548                 node = XML.new(event[1].to_sym, attrs, *event[3..-1])
 549                 
 550                 # I can't say it's superelegant
 551                 class <<node
 552                     attr_accessor :do_complete
 553                     def complete!
 554                         if @do_complete
 555                             @do_complete.call
 556                             @do_complete = nil
 557                         end
 558                     end
 559                 end
 560                 node.do_complete = proc{
 561                     parse_subtree(node, parser)
 562                 }
 563 
 564                 yield(node)
 565                 if node.do_complete
 566                     stack.push node
 567                     node.do_complete = nil # It's too late, complete! shouldn't do anything now
 568                 end
 569             when :end_element
 570                 stack.pop
 571             when :end_document
 572                 return
 573             else
 574                 # FIXME: Do the right thing.
 575                 # For now, ignore *everything* else
 576                 # This is totally incorrect, user might want to 
 577                 # see text, comments and stuff like that anyway
 578             end
 579         end
 580     end
 581     
 582     # Basically it's a copy of self.parse, ugly ...
 583     def self.parse_subtree(start_node, parser)
 584         stack = [start_node]
 585         res = nil
 586         while true
 587             event = parser.pull
 588             case event[0]
 589             when :start_element
 590                 attrs = {}
 591                 event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape}
 592                 stack << XML.new(event[1].to_sym, attrs, *event[3..-1])
 593                 if stack.size == 1
 594                     res = stack[0] 
 595                 else
 596                     stack[-2] << stack[-1]
 597                 end
 598             when :end_element
 599                 stack.pop
 600                 return if stack == []
 601             # Needs unescaping
 602             when :text
 603                  # Ignore whitespace
 604                  if stack.size == 0
 605                      next if event[1] !~ /\S/
 606                      raise "Non-whitespace text out of document root"
 607                  end
 608                  stack[-1] << event[1].xml_unescape
 609             # CDATA is already unescaped
 610             when :cdata
 611                  if stack.size == 0
 612                      raise "CDATA out of the document root"
 613                  end
 614                  stack[-1] << event[1]
 615             when :end_document
 616                 raise "Parse error: end_document inside a subtree, tags are not balanced"
 617             when :xmldecl,:start_doctype,:end_doctype,:elementdecl,:processing_instruction
 618                 # Positivery ignore
 619             when :comment,:externalentity,:entity,:attlistdecl,:notationdecl
 620                 # Ignore ???
 621                 #print "Ignored XML event #{event[0]} when parsing\n"
 622             else
 623                 # Huh ? What's that ?
 624                 #print "Unknown XML event #{event[0]} when parsing\n"
 625             end
 626         end
 627         res
 628 
 629     end
 630 
 631     # Parse XML using REXML. Available options:
 632     # * :extra_entities => Proc or Hash (default = nil)
 633     # * :remove_pretty_printing => true/false (default = false)
 634     # * :comments => true/false (default = false)
 635     # * :pi => true/false (default = false)
 636     # * :normalize => true/false (default = false) - normalize
 637     # * :multiple_roots => true/false (default=false) - document
 638     #      can have any number of roots (instread of one).
 639     #      Return all in an array instead of root/nil.
 640     #      Also include non-elements (String/PI/Comment) in the return set !!!
 641     #
 642     # FIXME: :comments/:pi will break everything
 643     # if there are comments/PIs outside document root.
 644     # Now PIs are outside the document root more often than not,
 645     # so we're pretty much screwed here.
 646     #
 647     # FIXME: Integrate all kinds of parse, and make them support extra options
 648     #
 649     # FIXME: Benchmark normalize!
 650     #
 651     # FIXME: Benchmark dup-based Enumerable methods
 652     #
 653     # FIXME: Make it possible to include bogus XML_Document superparent,
 654     #        and to make it support out-of-root PIs/Comments
 655     def self.parse(stream, options={})
 656         extra_entities = options[:extra_entities]
 657 
 658         parser = REXML::Parsers::BaseParser.new stream
 659         stack = [[]]
 660         
 661         while true
 662             event = parser.pull
 663             case event[0]
 664             when :start_element
 665                 attrs = {}
 666                 event[2].each{|k,v| attrs[k.to_sym] = v.xml_unescape(extra_entities) }
 667                 stack << XML.new(event[1].to_sym, attrs, event[3..-1])
 668                 stack[-2] << stack[-1]
 669             when :end_element
 670                 stack.pop
 671             # Needs unescaping
 672             when :text
 673                  e = event[1].xml_unescape(extra_entities)
 674                  # Either inside root or in multi-root mode
 675                  if stack.size > 1 or options[:multiple_roots]
 676                      stack[-1] << e
 677                  elsif event[1] !~ /\S/
 678                      # Ignore out-of-root whitespace in single-root mode
 679                  else
 680                      raise "Non-whitespace text out of document root (and not in multiroot mode): #{event[1]}"
 681                  end
 682             # CDATA is already unescaped
 683             when :cdata
 684                 e = event[1]
 685                 if stack.size > 1 or options[:multiple_roots]
 686                     stack[-1] << e
 687                 else
 688                     raise "CDATA out of the document root"
 689                 end
 690             when :comment
 691                 next unless options[:comments]
 692                 e = XML_Comment.new(event[1])
 693                 if stack.size > 1 or options[:multiple_roots]
 694                     stack[-1] << e
 695                 else
 696                     # FIXME: Ugly !
 697                     raise "Comments out of the document root"
 698                 end
 699             when :processing_instruction
 700                 # FIXME: Real PI node
 701                 next unless options[:pi]
 702                 e = XML_PI.new(event[1], event[2])
 703                 if stack.size > 1 or options[:multiple_roots]
 704                     stack[-1] << e
 705                 else
 706                     # FIXME: Ugly !
 707                     raise "Processing instruction out of the document root"
 708                 end
 709             when :end_document
 710                 break
 711             when :xmldecl,:start_doctype,:end_doctype,:elementdecl
 712                 # Positivery ignore
 713             when :externalentity,:entity,:attlistdecl,:notationdecl
 714                 # Ignore ???
 715                 #print "Ignored XML event #{event[0]} when parsing\n"
 716             else
 717                 # Huh ? What's that ?
 718                 #print "Unknown XML event #{event[0]} when parsing\n"
 719             end
 720         end
 721         roots = stack[0]
 722         
 723         roots.each{|root| root.remove_pretty_printing!} if options[:remove_pretty_printing]
 724         # :remove_pretty_printing does :normalize anyway
 725         roots.each{|root| root.normalize!} if options[:normalize]
 726         if options[:multiple_roots]
 727             roots
 728         else
 729             roots[0]
 730         end
 731     end
 732 
 733     # Parse a sequence. Equivalent to XML.parse(stream, :multiple_roots => true).
 734     def self.parse_sequence(stream, options={})
 735         o = options.dup
 736         o[:multiple_roots] = true
 737         parse(stream, o)
 738     end
 739 
 740     # Renormalize a string containing XML document
 741     def self.renormalize(stream)
 742         parse(stream).to_s
 743     end
 744 
 745     # Renormalize a string containing a sequence of XML documents
 746     # and strings
 747     # XMLrenormalize_sequence("<hello   />, <world></world>!") =>
 748     # "<hello/>, <world/>!"
 749     def self.renormalize_sequence(stream)
 750         parse_sequence(stream).to_s
 751     end
 752 end
 753 
 754 # Instance methods (other than those of Enumerable)
 755 class XML
 756     attr_accessor :name, :attrs, :contents
 757 
 758     # initialize can be run in many ways
 759     # * XML.new
 760     # * XML.new(:tag_symbol)
 761     # * XML.new(:tag_symbol, {attributes})
 762     # * XML.new(:tag_symbol, "children", "more", XML.new(...))
 763     # * XML.new(:tag_symbol, {attributes}, "and", "children")
 764     # * XML.new(:tag_symbol) { monadic code }
 765     # * XML.new(:tag_symbol, {attributes}) { monadic code }
 766     #
 767     # Or even:
 768     # * XML.new(:tag_symbol, "children") { and some monadic code }
 769     # * XML.new(:tag_symbol, {attributes}, "children") { and some monadic code }
 770     # But typically you won't be mixing these two style
 771     #
 772     # Attribute values can will be converted to strings
 773     def initialize(*args, &blk)
 774         @name     = nil
 775         @attrs    = {}
 776         @contents = []
 777         @name = args.shift if args.size != 0
 778         if args.size != 0 and args[0].is_a? Hash
 779             args.shift.each{|k,v|
 780                 # Do automatic conversion here
 781                 # This also assures that the hashes are *not* shared
 782                 self[k] = v
 783             }
 784         end
 785         # Expand Arrays passed as arguments
 786         self << args
 787         # FIXME: We'd rather not have people say @name = :foo there :-)
 788         if blk
 789             instance_eval(&blk)
 790         end
 791     end
 792 
 793     # Convert to a well-formatted XML
 794     def to_s
 795         "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
 796         if @contents.size == 0
 797             "/>"
 798         else
 799             ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.to_s end}.join + "</#{name}>"
 800         end
 801     end
 802 
 803     # Convert to a well-formatted XML, but without children information.
 804     # This is a reasonable format for irb and debugging.
 805     # If you want to see a few levels of children, call inspect(2) and so on
 806     def inspect(include_children=0)
 807         "<#{@name}" + @attrs.sort.map{|k,v| " #{k}='#{v.xml_attr_escape}'"}.join +
 808         if @contents.size == 0
 809             "/>"
 810         elsif include_children == 0
 811             ">...</#{name}>"
 812         else
 813             ">" + @contents.map{|x| if x.is_a? String then x.xml_escape else x.inspect(include_children-1) end}.join + "</#{name}>"
 814         end
 815     end
 816 
 817     # Read attributes.
 818     # Also works with pseudoattributes:
 819     #  img[:@x] == img.child(:x).text # or nil if there isn't any.
 820     def [](key)
 821         if key.to_s[0] == ?@
 822             tag = key.to_s[1..-1].to_sym
 823             c = child(tag)
 824             if c
 825                 c.text
 826             else
 827                 nil
 828             end
 829         else
 830             @attrs[key]
 831         end
 832     end
 833 
 834     # Set attributes.
 835     # Value is automatically converted to String, so you can say:
 836     #  img[:x] = 200
 837     # Also works with pseudoattributes:
 838     #  foo[:@bar] = "x"
 839     def []=(key, value)
 840         if key.to_s[0] == ?@
 841             tag = key.to_s[1..-1].to_sym
 842             c = child(tag)
 843             if c
 844                 c.contents = [value.to_s]
 845             else
 846                 self << XML.new(tag, value.to_s)
 847             end
 848         else
 849             @attrs[key] = value.to_s
 850         end
 851     end
 852 
 853     # Add children.
 854     # Possible uses:
 855     # * Add single element
 856     #  self << xml(...)
 857     #  self << "foo"
 858     # Add nothing:
 859     #  self << nil  
 860     # Add multiple elements (also works recursively):
 861     #  self << [a, b, c] 
 862     #  self << [a, [b, c], d] 
 863     def <<(cnt)
 864         if cnt.nil?
 865             # skip
 866         elsif cnt.is_a? Array
 867             cnt.each{|elem| self << elem}
 868         else
 869             @contents << cnt
 870         end
 871         self
 872     end
 873 
 874     # Equality test, works as if XMLs were normalized, so:
 875     #  XML.new(:foo, "Hello, ", "world") == XML.new(:foo, "Hello, world")
 876     def ==(x)
 877         return false unless x.is_a? XML
 878         return false unless name == x.name and attrs == x.attrs
 879         # Now the hard part, strings can be split in different ways
 880         # empty string children are possible etc.
 881         self_i = 0
 882         othr_i = 0
 883         while self_i != contents.size or othr_i != x.contents.size
 884             # Ignore ""s
 885             if contents[self_i].is_a? String and contents[self_i] == ""
 886                 self_i += 1
 887                 next
 888             end
 889             if x.contents[othr_i].is_a? String and x.contents[othr_i] == ""
 890                 othr_i += 1
 891                 next
 892             end
 893 
 894             # If one is finished and the other contains non-empty elements,
 895             # they are not equal
 896             return false if self_i == contents.size or othr_i == x.contents.size
 897 
 898             # Are they both Strings ?
 899             # Strings can be divided in different ways, and calling normalize!
 900             # here would be rather expensive, so let's use this complicated
 901             # algorithm
 902             if contents[self_i].is_a? String and x.contents[othr_i].is_a? String
 903                 a = contents[self_i]
 904                 b = x.contents[othr_i]
 905                 self_i += 1
 906                 othr_i += 1
 907                 while a != "" or b != ""
 908                     if a == b
 909                         a = ""
 910                         b = ""
 911                     elsif a.size > b.size and a[0, b.size] == b
 912                         a = a[b.size..-1]
 913                         if x.contents[othr_i].is_a? String
 914                             b = x.contents[othr_i]
 915                             othr_i += 1
 916                             next
 917                         end
 918                     elsif b.size > a.size and b[0, a.size] == a
 919                         b = b[a.size..-1]
 920                         if contents[self_i].is_a? String
 921                             a = contents[self_i]
 922                             self_i += 1
 923                             next
 924                         end
 925                     else
 926                         return false
 927                     end
 928                 end
 929                 next
 930             end
 931 
 932             # OK, so at least one of them is not a String.
 933             # Hopefully they're either both XMLs or one is an XML and the
 934             # other is a String. It is also possible that contents contains
 935             # something illegal, but we aren't catching that,
 936             # so xml(:foo, Garbage.new) is going to at least equal itself.
 937             # And we aren't, because xml(:foo, Garbage.new) == xml(:bar, Garbage.new)
 938             # is going to return an honest false, and incoherent sanity
 939             # check is worse than no sanity check.
 940             #
 941             # Oh yeah, they can be XML_PI or XML_Comment. In such case, this
 942             # is ok.
 943             return false unless contents[self_i] == x.contents[othr_i]
 944             self_i += 1
 945             othr_i += 1
 946         end
 947         return true
 948     end
 949 
 950     alias_method :real_method_missing, :method_missing
 951     # Define all foo!-methods for monadic interface, so you can write:
 952     # 
 953     def method_missing(meth, *args, &blk) 
 954         if meth.to_s =~ /^(.*)!$/
 955             self << XML.new($1.to_sym, *args, &blk)
 956         else
 957             real_method_missing(meth, *args, &blk)
 958         end
 959     end
 960 
 961     # Make monadic interface more "official"
 962     # * node.exec! { foo!; bar! }
 963     # is equivalent to
 964     # * node << xml(:foo) << xml(:bar)
 965     def exec!(&blk)
 966         instance_eval(&blk)
 967     end
 968 
 969     # Select a subtree
 970     # NOTE: Uses object_id of the start/end tags !
 971     # They have to be the same, not just identical !
 972     # <foo>0<a>1</a><b/><c/><d>2</d><e/>3</foo>.range(<a>1</a>, <d>2</d>)
 973     # returns
 974     # <foo><b/><c/></foo>
 975     # start and end and their descendants are not included in
 976     # the result tree.
 977     # Either start or end can be nil.
 978     # * If both start and end are nil, return whole tree.
 979     # * If start is nil, return subtree up to range_end.
 980     # * If start is not inside the tree, return nil.
 981     # * If end is nil, return subtree from start
 982     # * If end is not inside the tree, return subtree from start.
 983     # * If end is before or below start, or they're the same node, the result is unspecified.
 984     # * if end comes directly after start, or as first node when start==nil, return path reaching there.
 985     def range(range_start, range_end, end_reached_cb=nil)
 986         if range_start == nil
 987             result = XML.new(name, attrs)
 988         else
 989             result = nil
 990         end
 991         @contents.each {|c|
 992             # end reached !
 993             if range_end and c.object_id == range_end.object_id
 994                 end_reached_cb.call if end_reached_cb
 995                 break
 996             end
 997             # start reached !
 998             if range_start and c.object_id == range_start.object_id
 999                 result = XML.new(name, attrs)
1000                 next
1001             end
1002             if result # We already started
1003                 if c.is_a? XML
1004                     break_me = false
1005                     result.add! c.range(nil, range_end, lambda{ break_me = true })
1006                     if break_me
1007                         end_reached_cb.call if end_reached_cb
1008                         break
1009                     end
1010                 else # String/XML_PI/XML_Comment
1011                     result.add! c
1012                 end
1013             else
1014                 # Strings/XML_PI/XML_Comment obviously cannot start a range
1015                 if c.is_a? XML
1016                     break_me = false
1017                     r = c.range(range_start, range_end, lambda{ break_me = true })
1018                     if r
1019                         # start reached !
1020                         result = XML.new(name, attrs, r)
1021                     end
1022                     if break_me
1023                         # end reached !
1024                         end_reached_cb.call if end_reached_cb
1025                         break
1026                     end
1027                 end
1028             end
1029         }
1030         return result
1031     end
1032 
1033     # XML#subsequence is similar to XML#range, but instead of
1034     # trimmed subtree in returns a list of elements
1035     # The same elements are included in both cases, but here
1036     # we do not include any parents !
1037     #
1038     # <foo><a/><b/><c/></foo>.range(a,c) => <foo><b/></foo>
1039     # <foo><a/><b/><c/></foo>.subsequence(a,c) => <b/>
1040     #
1041     # <foo><a><a1/></a><b/><c/></foo>.range(a1,c) => <foo><a/><b/></foo> # Does <a/> make sense ?
1042     # <foo><a><a1/></a><b/><c/></foo>.subsequence(a1,c) => <b/>
1043     #
1044     # <foo><a><a1/><a2/></a><b/><c/></foo>.range(a1,c) => <foo><a><a2/></a><b/></foo>
1045     # <foo><a><a1/><a2/></a><b/><c/></foo>.subsequence(a1,c) => <a2/><b/>
1046     #
1047     # And we return [], not nil if nothing matches
1048     def subsequence(range_start, range_end, start_seen_cb=nil, end_seen_cb=nil)
1049         result = []
1050         start_seen = range_start.nil?
1051         @contents.each{|c|
1052             if range_end and range_end.object_id == c.object_id
1053                 end_seen_cb.call if end_seen_cb
1054                 break 
1055             end
1056             if range_start and range_start.object_id == c.object_id
1057                 start_seen = true
1058                 start_seen_cb.call if start_seen_cb
1059                 next
1060             end
1061             if start_seen
1062                 if c.is_a? XML
1063                     break_me = false
1064                     result += c.subsequence(nil, range_end, nil, lambda{break_me=true})
1065                     break if break_me
1066                 else # String/XML_PI/XML_Comment
1067                     result << c
1068                 end
1069             else
1070                 # String/XML_PI/XML_Comment cannot start a subsequence
1071                 if c.is_a? XML
1072                     break_me = false
1073                     result += c.subsequence(range_start, range_end, lambda{start_seen=true}, lambda{break_me=true})
1074                     break if break_me
1075                 end
1076             end
1077         }
1078         # Include starting tag if it was right from the range_start
1079         # Otherwise, return just the raw sequence
1080         result = [XML.new(@name, @attrs, result)] if range_start == nil
1081         return result
1082     end
1083 
1084     # =~ for a few reasonable patterns
1085     def =~(pattern)
1086         if pattern.is_a? Symbol
1087             @name == pattern
1088         elsif pattern.is_a? Regexp
1089             rv = text =~ pattern
1090         else # Hash, Pattern_any, Pattern_all
1091             pattern === self
1092         end
1093     end
1094     
1095     # Get rid of pretty-printing whitespace. Also normalizes the XML.
1096     def remove_pretty_printing!(exceptions=nil)
1097         normalize!
1098         real_remove_pretty_printing!(exceptions)
1099         normalize!
1100     end
1101 
1102     # normalize! is already recursive, so only one call at top level is needed.
1103     # This helper method lets us avoid extra calls to normalize!.
1104     def real_remove_pretty_printing!(exceptions=nil)
1105         return if exceptions and exceptions.include? @name
1106         each{|c|
1107             if c.is_a? String
1108                 c.sub!(/^\s+/, "")
1109                 c.sub!(/\s+$/, "")
1110                 c.gsub!(/\s+/, " ")
1111             elsif c.is_a? XML_PI or c.is_a? XML_Comment
1112             else
1113                 c.real_remove_pretty_printing!(exceptions)
1114             end
1115         }
1116     end
1117 
1118     protected :real_remove_pretty_printing!
1119 
1120     # Add pretty-printing whitespace. Also normalizes the XML.
1121     def add_pretty_printing!
1122         normalize!
1123         real_add_pretty_printing!
1124         normalize!
1125     end
1126     
1127     def real_add_pretty_printing!(indent = "")
1128         return if @contents.empty?
1129         each{|c|
1130             if c.is_a? XML
1131                 c.real_add_pretty_printing!(indent+"  ")
1132             elsif c.is_a? String
1133                 c.gsub!(/\n\s*/, "\n#{indent}  ")
1134             end
1135         }
1136         @contents = @contents.inject([]){|children, c| children + ["\n#{indent}  ", c]}+["\n#{indent}"]
1137     end
1138 
1139     protected :real_add_pretty_printing!
1140 
1141     alias_method :raw_dup, :dup
1142     # This is not a trivial method - first it does a *deep* copy,
1143     # second it takes a block which is instance_eval'ed,
1144     # so you can do things like:
1145     # * node.dup{ @name = :foo }
1146     # * node.dup{ self[:color] = "blue" }
1147     def dup(&blk)
1148         new_obj = self.raw_dup
1149         # Attr values stay shared - ugly
1150         new_obj.attrs = new_obj.attrs.dup
1151         new_obj.contents = new_obj.contents.map{|c| c.dup}
1152         
1153         new_obj.instance_eval(&blk) if blk
1154         return new_obj
1155     end
1156 
1157 
1158     # Add some String children (all attributes get to_s'ed)
1159     def text!(*args)
1160         args.each{|s| self << s.to_s}
1161     end
1162     # Add XML child
1163     def xml!(*args, &blk)
1164         @contents << XML.new(*args, &blk)
1165     end
1166 
1167     alias_method :add!, :<<
1168     
1169     # Normalization means joining strings
1170     # and getting rid of ""s, recursively
1171     def normalize!
1172         new_contents = []
1173         @contents.each{|c|
1174             if c.is_a? String
1175                 next if c == ""
1176                 if new_contents[-1].is_a? String
1177                     new_contents[-1] += c
1178                     next
1179                 end
1180             else
1181                 c.normalize!
1182             end
1183             new_contents.push c
1184         }
1185         @contents = new_contents
1186     end
1187 
1188     # Return text below the node, stripping all XML tags,
1189     # "<foo>Hello, <bar>world</bar>!</foo>".xml_parse.text
1190     # returns "Hello, world!"
1191     def text
1192         res = ""
1193         @contents.each{|c|
1194             if c.is_a? XML
1195                 res << c.text
1196             elsif c.is_a? String
1197                 res << c
1198             end # Ignore XML_PI/XML_Comment
1199         }
1200         res
1201     end
1202 
1203     # Equivalent to node.children(pat, *rest)[0]
1204     # Returns nil if there aren't any matching children
1205     def child(pat=nil, *rest)
1206         children(pat, *rest) {|c|
1207             return c
1208         }
1209         return nil
1210     end
1211 
1212     # Equivalent to node.descendants(pat, *rest)[0]
1213     # Returns nil if there aren't any matching descendants
1214     def descendant(pat=nil, *rest)
1215         descendants(pat, *rest) {|c|
1216             return c
1217         }
1218         return nil
1219     end
1220 
1221     # XML#children(pattern, more_patterns)
1222     # Return all children of a node with tags matching tag.
1223     # Also:
1224     # * children(:a, :b) == children(:a).children(:b)
1225     # * children(:a, :*, :c) == children(:a).descendants(:c)
1226     def children(pat=nil, *rest, &blk)
1227         return descendants(*rest, &blk) if pat == :*
1228         res = []
1229         @contents.each{|c|
1230             if pat.nil? or pat === c
1231                 if rest == []
1232                     res << c
1233                     yield c if block_given?
1234                 else
1235                     res += c.children(*rest, &blk)
1236                 end
1237             end
1238         }
1239         res
1240     end
1241     
1242     # * XML#descendants
1243     # * XML#descendants(pattern)
1244     # * XML#descendants(pattern, more_patterns)
1245     #
1246     # Return all descendants of a node matching the pattern.
1247     # If pattern==nil, simply return all descendants.
1248     # Optionally run a block on each of them if a block was given.
1249     # If pattern==nil, also match Strings !
1250     def descendants(pat=nil, *rest, &blk)
1251         res = []
1252         @contents.each{|c|
1253             if pat.nil? or pat === c
1254                 if rest == []
1255                     res << c
1256                     yield c if block_given?
1257                 else
1258                     res += c.children(*rest, &blk)
1259                 end
1260             end
1261             if c.is_a? XML
1262                 res += c.descendants(pat, *rest, &blk)
1263             end
1264         }
1265         res
1266     end
1267     
1268     # Change elements based on pattern
1269     def deep_map(pat, &blk)
1270         if self =~ pat
1271             yield self
1272         else
1273             r = XML.new(self.name, self.attrs)
1274             each{|c|
1275                 if c.is_a? XML
1276                     r << c.deep_map(pat, &blk)
1277                 else
1278                     r << c
1279                 end
1280             }
1281             r
1282         end
1283     end
1284 
1285     # FIXME: do we want a shallow or a deep copy here ?
1286     # Map children, but leave the name/attributes
1287     def map(pat=nil)
1288         r = XML.new(self.name, self.attrs)
1289         each{|c|
1290             if !pat || c =~ pat
1291                 r << yield(c)
1292             else
1293                 r << c
1294             end
1295         }
1296         r
1297     end
1298 end
1299 
1300 # FIXME: Is this even sane ?
1301 # * What about escaping and all that stuff ?
1302 # * Rest of the code assumes that everything is either XML or String
1303 class XML_PI
1304     def initialize(c, t)
1305         @c = c
1306         @t = t
1307     end
1308     def to_s
1309         "<?#{@c}#{@t}?>"
1310     end
1311 end
1312 
1313 # FIXME: Is this even sane ?
1314 # * What about escaping and all that stuff ?
1315 # * Rest of the code assumes that everything is either XML or String
1316 # * There are some limitations on where one can put -s in the comment. Do not overdo.
1317 class XML_Comment
1318     def initialize(c)
1319         @c = c
1320     end
1321     def to_s
1322         "<!--#{@c}-->"
1323     end
1324 end
1325 
1326 # Syntactic sugar for XML.new
1327 def xml(*args, &blk)
1328     XML.new(*args, &blk)
1329 end
1330 
1331 # xml! in XML { ... } - context adds node to parent
1332 # xml! in main context prints the argument (and returns it anyway)
1333 def xml!(*args, &blk)
1334     node = xml(*args, &blk)
1335     print node
1336     node
1337 end
1338 
1339 # Perl 6 is supposed to have native support for something like that.
1340 # Constructor takes multiple patterns. The object matches if they all match.
1341 #
1342 # Usage:
1343 #  case foo
1344 #  when all(:foo, {:color => 'blue'}, /Hello/)
1345 #       print foo
1346 #  end
1347 class Patterns_all
1348     def initialize(*patterns)
1349         @patterns = patterns
1350     end
1351     def ===(obj)
1352         @patterns.all?{|p| p === obj}
1353     end
1354 end
1355 
1356 def all(*patterns)
1357     Patterns_all.new(*patterns)
1358 end
1359 
1360 # Perl 6 is supposed to have native support for something like that.
1361 # Constructor takes multiple patterns. The object matches if they all match.
1362 #
1363 # Usage:
1364 #  case foo
1365 #  when all(:foo, any({:color => 'blue'}, {:color => 'red'}), /Hello/)
1366 #       print foo
1367 #  end
1368 class Patterns_any
1369     def initialize(*patterns)
1370         @patterns = patterns
1371     end
1372     def ===(obj)
1373         @patterns.any?{|p| p === obj}
1374     end
1375 end
1376 
1377 def any(*patterns)
1378     Patterns_any.new(*patterns)
1379 end

Generated using the rcov code coverage analysis tool for Ruby version 0.8.0.

Valid XHTML 1.0! Valid CSS!