Add xmlpp.pl from http://software.decisionsoft.com/tools.html

author mike <mike>

Tue, 19 Nov 2002 12:11:48 +0000 (12:11 +0000)

committer mike <mike>

Tue, 19 Nov 2002 12:11:48 +0000 (12:11 +0000)
author mike <mike>
Tue, 19 Nov 2002 12:11:48 +0000 (12:11 +0000)
committer mike <mike>
Tue, 19 Nov 2002 12:11:48 +0000 (12:11 +0000)
diff --git a/test/regression/README b/test/regression/README

index 83d384e..fe38ee2 100644 (file)
--- a/test/regression/README
+++ b/test/regression/README
@@ -1,4 +1,4 @@
-$Id: README,v 1.1 2002-11-03 16:53:04 mike Exp $
+$Id: README,v 1.2 2002-11-19 12:11:48 mike Exp $
  
  "queries.raw" is the file of test queries as provided by Rob.
  "mktests" parses the raw file into sections and individual queries
@@ -26,6 +26,9 @@ against its results, do this:
         ./runtests CQLParser sgmlnorm
  
  (Except that sgmlnorm is useless -- gotta find something better.)
+(That something better is xmlpp from DecisionSoft, found at
+       http://software.decisionsoft.com/tools.html
+which I will probably distribute along with cql-java.)
  
  Also: there's a nasty hack here called "showtest" which, when run like
  ``./showtest 07/03'', will show you the ways in which my output
diff --git a/test/regression/xmlpp.pl b/test/regression/xmlpp.pl

new file mode 100755 (executable)

index 0000000..a4400c4
--- /dev/null
+++ b/test/regression/xmlpp.pl
@@ -0,0 +1,403 @@
+#!/usr/bin/perl  -w
+
+#
+#  Copyright (c) 2002, DecisionSoft Limited All rights reserved.
+#  Please see: 
+#  http://software.decisionsoft.com/license.html
+#  for more information.
+# 
+
+# $Revision: 1.1 $
+#
+# xmlpp: XML pretty printing
+#
+
+# For custom attribute sorting create an attributeOrdering.txt file that
+# lists each attributes separated by a newline in the order you would like
+# them to be sorted separated by a newline. Then use the -s option.
+
+use FileHandle;
+use Fcntl;
+use Getopt::Std;
+
+use vars qw($opt_h $opt_H $opt_s $opt_z $opt_t $opt_e $opt_S $opt_c $opt_n);
+
+my $indent=0;
+my $textContent='';
+my $lastTag=undef;
+my $output;
+my $inAnnotation = 0;
+
+
+if (!getopts('nzhHsteSc') or $opt_h) {
+    usage();
+}
+
+if ($opt_s){
+
+# expect to find attributeOrdering.txt file in same directory
+# as xmlpp is being run from
+    
+  my $scriptDir = $0;
+  if ($scriptDir =~ m#/#){
+    $scriptDir =~ s#/[^/]+$##;
+  }
+  else{
+    $scriptDir =".";
+  }
+    
+  # get attribute ordering from external file
+  if (open(SORTLIST, "<$scriptDir/attributeOrdering.txt")) {
+    @sortlist = <SORTLIST>;
+    chomp @sortlist;
+    close (SORTLIST);
+    @specialSort = grep(/^\w+/, @sortlist);
+  } 
+  else {      
+   print STDERR  "Could not open $scriptDir/attributeOrdering.txt: $!\nWARNING attribute sorting will only be alphabetic\n\n";
+  }
+}
+
+
+# set line separator to ">" speeding up parsing of XML files
+# with no line breaks 
+
+$/ = ">";
+
+
+my $sortAttributes = $opt_s;
+my $newLineComments = $opt_c;
+my $splitAttributes = $opt_t;
+my $schemaHackMode = $opt_S;
+my $normaliseWhiteSpace = $opt_n;
+
+my $filename = $ARGV[0];
+if ($opt_z && (!$filename or $filename eq '-')) {
+    print STDERR "Error: I can't edit STDIN in place.\n";
+    usage();
+}
+
+if (!$opt_z && scalar(@ARGV) > 1) {
+    print STDERR "Warning: Multiple files specified without -z option\n"; 
+}
+
+my $fh;
+
+my $stdin;
+
+if (!$filename or $filename eq '-') {
+    $fh=*STDIN;
+    $stdin=1;
+} else {
+    $fh = open_next_file() or exit(1);
+    $stdin=0;
+}
+
+do {
+    $indent=0;
+    $textContent='';
+    $lastTag=undef;
+    $output = '';
+    my $re_name = "(?:[A-Za-z0-9_:][A-Za-z0-9_:.-]*)";
+    my $re_attr = "(?:'[^']*'|\"[^\"]*\")";
+    my $input;
+
+    while ($input .= <$fh>) {
+        while ($input) {
+            if ($input =~ s/^<($re_name)((?:\s+$re_name\s*=\s*$re_attr)*\s*)(\/?)>(.*)$/$4/s ) {
+                my %attr;
+                my ($name,$attr,$selfclose) = ($1,$2,$3);
+                while ($attr =~ m/($re_name)\s*=\s*($re_attr)/gs) {
+                    my ($name,$value) = ($1,$2);
+                    $value =~ s/^["'](.*)["']$/$1/s;
+                    $attr{$name} = $value;
+                }
+                if ($opt_e) {
+                    parseStart($name, 0, %attr);
+                    if ($selfclose) { parseEnd($name) }
+                } else {
+                    parseStart($name, $selfclose, %attr);
+                }
+            } elsif ($input =~ s/^<\/($re_name)\s*>(.*)$/$2/s) {
+                parseEnd($1);
+            } elsif ($input =~ s/^<!--(.*?)-->(.*)$/$2/s) { 
+                parseComment($1);
+            } elsif ($input =~ s/^([^<]+)(.*)$/$2/s) {
+                parseDefault($1);
+            } elsif ($input =~ s/^(<\?[^>]*\?>)(.*)$/$2/s) {
+                parsePI("$1\n");
+            } elsif ($input =~ s/^(<\!DOCTYPE[^\[>]*(\[[^\]]*\])?[^>]*>)(.*)$/$3/s) {
+                parseDoctype("$1");
+            } else {
+                last;
+            }
+        }
+        if (eof($fh)) {
+            last;
+        }
+    }
+
+
+    if ($input) {
+        $input =~ m/([^\n]+)/gs;
+        print STDERR "WARNING: junk remaining on input: $1\n";
+    }
+    $fh->close();
+
+    if (!$opt_z) {
+        if(!$opt_H){ 
+            print "$output\n"
+        } else {
+            print html_escape($output)."\n"
+        }
+    } else {
+        if ($input) { 
+            print STDERR "Not overwriting file\n";
+        } else {
+            open FOUT,"> $filename" or die "Cannot overwrite file: $!";
+            if(!$opt_H){
+                print FOUT "$output\n"
+            } else {
+                print FOUT html_escape($output)."\n"
+            }
+            close FOUT
+        }
+    }
+} while (
+    !$stdin && $opt_z && ($fh = open_next_file(\$filename))
+  );
+  
+
+
+sub parseStart {
+    my $s = shift;
+    my $selfclose = shift;
+    my %attr = @_;
+
+    $textContent =~ s/\s+$//; 
+    printContent($textContent);
+
+    if($inAnnotation) {
+        return;
+    }
+
+    if($schemaHackMode and $s =~ m/(^|:)annotation$/) {
+        $inAnnotation = 1;
+        $textContent = '';
+        $lastTag = 1;
+        return;
+    }
+    if (length($output)) {
+        $output .= "\n";
+    }
+
+    $output .= "  " x $indent;
+    $output .= "<$s";
+    my @k = keys %attr;
+
+    if ($sortAttributes && (scalar(@k) > 1) ){
+
+      my @alphaSorted;
+      my @needSpecialSort;
+      my @final;
+      my $isSpecial;
+
+      # sort attributes alphabetically (default ordering)
+      @alphaSorted = sort @k;
+
+      # read through sorted list, if attribute doesn't have specified
+      # sort order, push it onto the end of the final array (this maintains
+      # alphabetic order). Else create a list that has attributes needing
+      # special ordering.
+      foreach $attribute (@alphaSorted){
+        $isSpecial = 0;
+        foreach $sortAttrib (@specialSort){
+          if ($attribute eq $sortAttrib){
+            push @needSpecialSort, $attribute;
+            $isSpecial = 1;
+          }
+        }
+        if (!$isSpecial){
+          push @final, $attribute;
+        }
+      }
+
+      # now read through the specialSort list backwards looking for
+      # any match in the needSpecialSort list. Unshift this onto the 
+      # front of the final array to maintain proper order.
+      foreach my $attribute (reverse @specialSort){
+        foreach (@needSpecialSort){
+          if ($attribute eq $_){
+            unshift @final, $attribute;
+          }
+        }
+      }
+
+      @k = @final;
+    }
+
+    foreach my $attr (@k) {
+        # 
+        # Remove (min|max)Occurs = 1 if schemaHackMode
+        #
+        if ($schemaHackMode and $attr =~ m/^(minOccurs|maxOccurs)$/ and $attr{$attr} eq "1") {
+            next;
+        }
+
+        if ($splitAttributes) {
+            $output .= "\n"."  " x $indent." ";
+        }
+        if ($attr{$attr} =~ /'/) {
+            $output .= " $attr=\"$attr{$attr}\"";
+        } else {
+            $output .= " $attr='$attr{$attr}'";
+        }
+    }
+    if ($splitAttributes and @k) {
+        $output .= "\n"."  " x $indent;
+    }
+    if ($selfclose) {
+        $output .= " />";
+        $lastTag = 0;
+    } else {
+        $output .= ">";
+        $indent++;
+        $lastTag = 1;
+    }
+    $textContent = '';
+}
+
+sub parseEnd {
+    my $s = shift;
+
+    if($inAnnotation) {
+        if($s =~ m/(^|:)annotation$/) {
+            $inAnnotation = 0;
+        }
+        return;
+    }
+
+    if($normaliseWhiteSpace) {
+        $textContent =~ s/^\s*(.*?)\s*$/$1/;
+    }
+    $indent--;
+    printContent($textContent);
+    if ($lastTag == 0) {
+        $output .= "\n";
+        $output .= "  " x $indent;
+    } 
+    $output .= "</$s>";
+    $textContent = '';
+    $lastTag = 0;
+}
+
+sub parseDefault {
+    my $s = shift;
+    if($inAnnotation) { return }
+    $textContent .= "$s";
+}
+
+sub parsePI {
+    my $s = shift;
+    $output .= "$s";
+}
+
+sub parseDoctype {
+    my $s = shift;
+    if ($s =~ /^([^\[]*\[)([^\]]*)(\].*)$/ms) {
+      $start = $1;
+      $DTD = $2;
+      $finish = $3;
+      $DTD =~ s/\</\n  \</msg;
+      $output .= "$start$DTD\n$finish\n";
+    } else {
+      $output .= "$s";
+    }
+}
+
+sub parseComment {
+    my $s = shift; 
+    if($inAnnotation) { return }
+    printContent($textContent,1);
+    if ($s =~ /([^\<]*)(<.*>)(.*)/ms) {
+      $start = $1;
+      $xml = $2;
+      $finish = $3;
+      $xml =~ s/\</\n\</msg;
+      $xml =~ s/(\n\s*\n?)+/\n/msg;
+      $xml =~ s/^\s*//msg;
+      $xml =~ s/\s*$//msg;
+      $s = "$start\n$xml\n$finish";
+    }
+    $s =~ s/\n\s*$/\n  /msg;
+    if ($newLineComments) {
+        $output .= "\n<!--$s-->\n";
+    } else {
+        $output .= "<!--$s-->";
+    }
+    $textContent='';
+}
+
+sub printContent {
+    my $s = shift;
+    my $printLF = shift;
+    my ($LF,$ret) = ("","");
+
+    if ($s =~ m/\n\s*$/) {
+        $LF = "\n"; 
+    }
+    if ($s =~ m/^[\s\n]*$/) {
+        $ret = undef;
+    } else {
+        $output .= "$s";
+        $ret = 1;
+    }
+    if ($printLF) {
+        $output .= $LF;
+    }
+}
+
+
+sub html_escape {
+    my $s = shift;
+    $s =~ s/&/&amp;/gsm;
+    $s =~ s/</&lt;/gsm;
+     $s =~ s/>/&gt;/gsm;
+    return $s;
+}
+
+sub open_next_file {
+    my $filename = shift;
+    $$filename = shift @ARGV;
+    while ($$filename and ! -f $$filename) {
+        print STDERR "WARNING: Could not find file: $$filename\n";
+        $$filename = shift @ARGV;
+    }
+    if(!$$filename) {
+        return undef;
+    }
+    my $fh = new FileHandle;
+    $fh->open("< $$filename") or die "Can't open $$filename: $!";
+    return $fh;
+}
+
+sub usage {
+    print STDERR <<EOF;
+usage: $0 [ options ] [ file.xml ... ]
+
+options:
+  -h  display this help message
+  -H  escape characters (useful for further processing)
+  -t  split attributes, one per line (useful for diff)
+  -s  sort attributes (useful for diff)
+  -z  in place edit (zap)
+  -e  expand self closing tags (useful for diff)
+  -S  schema hack mode (used by xmldiff)
+  -c  place comments on new line.
+  -n  normalise whitespace (remove leading and trailing whitespace from nodes
+      with text content.
+
+EOF
+    exit 1;
+}
+
author	mike <mike>
	Tue, 19 Nov 2002 12:11:48 +0000 (12:11 +0000)
committer	mike <mike>
	Tue, 19 Nov 2002 12:11:48 +0000 (12:11 +0000)
test/regression/README		patch \| blob \| history
test/regression/xmlpp.pl	[new file with mode: 0755]	patch \| blob