Difference between two XML files
Facts - XSLT
Wednesday, 28 November 2007 22:06

I have an XSLT computing the difference between two XML files, see below. I derived it from Oliver Becker's stylesheet for merging two XML files. The license is LGPL. It uses an auxiliary input file containing tags with the two input XML's.

My customers always want a tool for regression testing, which often comes down to comparing two XML's. Recently I have come across a better tool from Rémi Peyronnet. He even provides a gui for investigating the output. The tool takes advantage of id's in the xml files as well by matching records with the same name and id element or id attibute. In my case these id's were child elements of a container element.

My command line for Rémi's tool is:

xmldiff diff --ids id order_orig.xml order_changed.xml output.xml

In case the id is an atttribute of a container element your command line needs to be as follows:

xmldiff diff --ids '@id' order_orig.xml order_changed.xml output.xml

My XSLT for computing the difference between two xml's (output is html):

<?xml version="1.0"?>
 
<xslt:transform version="1.0"
                xmlns:xslt="http://www.w3.org/1999/XSL/Transform"
                xmlns:m="http://factsandpeople.com/xsltdiff"
                exclude-result-prefixes="m">
 
<!-- Normalize the contents of text, comment, and processing-instruction
     nodes before comparing?
     Default: yes -->
<xslt:param name="normalize" select="'yes'" />
 
<!-- Do not diff elements with this (qualified) name -->
<xslt:param name="dontmerge" />
 
<!-- If set to true, text nodes in file1 will be replaced -->
<xslt:param name="replace" select="false()" />
 
<!-- Variant 1: Source document looks like
     <?xml version="1.0"?>
     <merge xmlns="http://informatik.hu-berlin.de/merge">
        <file1>file1.xml</file1>
        <file2>file2.xml</file2>
     </merge>        
     The transformation sheet diffs file1.xml and file2.xml.
-->
<xslt:template match="m:merge" >
  <table align="center" border="1">
  <tr>
    <th>Test Output Field Name</th>
    <th>Test Status</th>
    <th>Test Output Attributes</th>
    <th>Test Output Values</th>
    <th>Expected Field Name</th>
    <th>Expected Attributes</th>
    <th>Expected Value</th>
  </tr>
   <xslt:variable name="file1" select="string(m:file1)" />
   <xslt:variable name="file2" select="string(m:file2)" />
   <xslt:message>
      <xslt:text />Diffing '<xslt:value-of select="$file1" />
      <xslt:text />' and '<xslt:value-of select="$file2"/>'<xslt:text />
   </xslt:message>
   <xslt:if test="$file1='' or $file2=''">
      <xslt:message terminate="yes">
         <xslt:text>No files to diff specified</xslt:text>
      </xslt:message>
   </xslt:if>
   <xslt:call-template name="m:merge">
      <xslt:with-param name="nodes1" select="document($file1,/*)/node()" />
      <xslt:with-param name="nodes2" select="document($file2,/*)/node()" />
   </xslt:call-template>
 
  </table>
 
</xslt:template>
<!-- end of variant 1 -->
 
<!-- Variant 2:
     The transformation sheet diffs the source document with the
     document provided by the parameter "with".
-->
<xslt:param name="with" />
 
<xslt:template match="*">
     <table align="center" border="1">
       <tr>
     <th>Test Output Field Name</th>
     <th>Test Status</th>
     <th>Test Output Attributes</th>
     <th>Test Output Values</th>
     <th>Expected Field Name</th>
     <th>Expected Attributes</th>
     <th>Expected Value</th>
       </tr>
   <xslt:message>
      <xslt:text />Diffing input with '<xslt:value-of select="$with"/>
      <xslt:text>'</xslt:text>
   </xslt:message>
   <xslt:if test="string($with)=''">
      <xslt:message terminate="yes">
         <xslt:text>No input file specified (parameter 'with')</xslt:text>
      </xslt:message>
   </xslt:if>
 
   <xslt:call-template name="m:merge">
      <xslt:with-param name="nodes1" select="/node()" />
      <xslt:with-param name="nodes2" select="document($with,/*)/node()" />
   </xslt:call-template>
 
  </table>
 
</xslt:template>
<!-- end of variant 2 -->
 
<!-- ============================================================== -->
 
<!-- The "diff" template -->
<xslt:template name="m:merge">
   <xslt:param name="nodes1" />
   <xslt:param name="nodes2" />
 
   <xslt:choose>
 
      <!-- Is $nodes1 resp. $nodes2 empty? -->
 
      <xslt:when test="count($nodes1)=0 and count($nodes2)!=0">
    <xslt:call-template name="m:make-table-row">
      <xslt:with-param name="node1" select="''" />
      <xslt:with-param name="node2" select="$nodes2" />
      <xslt:with-param name="description" select="'No Test Output found'" />
      <xslt:with-param name="case" select="'no1node'" />
    </xslt:call-template>
      </xslt:when>
      <xslt:when test="count($nodes1)!=0 and count($nodes2)=0">
    <xslt:call-template name="m:make-table-row">
      <xslt:with-param name="node1" select="$nodes1" />
      <xslt:with-param name="node2" select="''" />
      <xslt:with-param name="description" select="'No Expected Results found'" />
      <xslt:with-param name="case" select="'no2node'" />
    </xslt:call-template>
      </xslt:when>
 
      <xslt:when test="count($nodes1)=0 and count($nodes2)=0">
      </xslt:when>
 
      <xslt:otherwise>
         <!-- Split $nodes1 and $nodes2 -->
         <xslt:variable name="first1" select="$nodes1[1]" />
         <xslt:variable name="rest1" select="$nodes1[position()!=1]" />
         <xslt:variable name="first2" select="$nodes2[1]" />
         <xslt:variable name="rest2" select="$nodes2[position()!=1]" />
         <!-- Determine type of node $first1 -->
         <xslt:variable name="type1">
            <xslt:apply-templates mode="m:detect-type" select="$first1" />
         </xslt:variable>
 
         <!-- Compare $first1 and $first2 -->
         <xslt:variable name="diff-first">
            <xslt:call-template name="m:compare-nodes">
               <xslt:with-param name="node1" select="$first1" />
               <xslt:with-param name="node2" select="$first2" />
            </xslt:call-template>
         </xslt:variable>
 
         <xslt:choose>
 
            <!-- contents($first1) != contents($first2) -->
            <xslt:when test="$diff-first='c'">
          <xslt:call-template name="m:make-table-row">
        <xslt:with-param name="node1" select="$first1" />
        <xslt:with-param name="node2" select="$first2" />
        <xslt:with-param name="description" select="'Content Different'" />
        <xslt:with-param name="case" select="'default'" />
          </xslt:call-template>
        </xslt:when>
 
            <xslt:when test="$diff-first='a'">
          <xslt:call-template name="m:make-table-row">
        <xslt:with-param name="node1" select="$first1" />
        <xslt:with-param name="node2" select="$first2" />
        <xslt:with-param name="description" select="'Attributes Different'" />
        <xslt:with-param name="case" select="'default'" />
          </xslt:call-template>
        </xslt:when>
 
            <!-- type($first1) != type($first2) -->
            <xslt:when test="$diff-first='t'">
          <xslt:call-template name="m:make-table-row">
        <xslt:with-param name="node1" select="$first1" />
        <xslt:with-param name="node2" select="$first2" />
        <xslt:with-param name="description" select="'XSLT Type Different'" />
        <xslt:with-param name="case" select="'default'" />
          </xslt:call-template>
        </xslt:when>
 
            <!-- $first1 != $first2, for elements go deeper in the tree -->
            <xslt:when test="$diff-first='!'">
               <!-- Compare $first1 and $rest2 -->
               <xslt:variable name="diff-rest">
                  <xslt:for-each select="$rest2">
                     <xslt:call-template name="m:compare-nodes">
                        <xslt:with-param name="node1" select="$first1" />
                        <xslt:with-param name="node2" select="." />
                     </xslt:call-template>
                  </xslt:for-each>
               </xslt:variable>
 
           <!-- Determine here whether the test results are in a next node (on the same level)
           of the Expected Results. If this is true then output the fact that the expected
           result $first2 is missing. If not then output that the test output $first1 can not
           be found in the expected results.  -->
               <xslt:choose>
                  <!-- $first1 is in $rest2 and
                       $first1 is *not* an empty text node  -->
                  <xslt:when test="contains($diff-rest,'=') and
                                      not($type1='text' and
                                          normalize-space($first1)='')">
            <!-- output the fact that the expected result $first2 is missing -->
 
                     <!-- determine position of $first1 in $nodes2
                          and copy all preceding nodes of $nodes2 -->
                     <xslt:variable name="pos"
                           select="string-length(substring-before(
                                                $diff-rest,'=')) + 2" />
                     <!-- <xslt:copy-of
                           select="$nodes2[position() < $pos]" /> -->
 
             <xslt:call-template name="m:make-table-row">
               <xslt:with-param name="node1" select="$first1" />
               <xslt:with-param name="node2" select="$first2" />
               <xslt:with-param name="description" select="''" />
               <xslt:with-param name="case" select="'1missing'" />
             </xslt:call-template>
 
                     <!-- diff $first1 with its equivalent node -->
                     <xslt:choose>
                        <!-- Elements: go deeper in the tree -->
                        <xslt:when test="$type1='element'">
 
              <xslt:call-template name="m:merge">
                <xslt:with-param name="nodes1"
                         select="$first1/node()" />
                <xslt:with-param name="nodes2"
                         select="$nodes2[position()=$pos]/node()" />
              </xslt:call-template>
                        </xslt:when>
                     </xslt:choose>
 
                     <!-- Diff $rest1 and rest of $nodes2 -->
                     <xslt:call-template name="m:merge">
                        <xslt:with-param name="nodes1" select="$rest1" />
                        <xslt:with-param name="nodes2"
                              select="$nodes2[position() > $pos]" />
                     </xslt:call-template>
                  </xslt:when>
 
                  <!-- else: $first1 is not in $rest2 or
                       $first1 is an empty text node -->
                  <xslt:otherwise>
            <!--If not then output that the test output $first1 can not
             be found in the expected results.  -->
 
             <xslt:call-template name="m:make-table-row">
               <xslt:with-param name="node1" select="$first1" />
               <xslt:with-param name="node2" select="$first2" />
               <xslt:with-param name="description" select="''" />
               <xslt:with-param name="case" select="'2missing'" />
             </xslt:call-template>
 
                     <xslt:call-template name="m:merge">
                        <xslt:with-param name="nodes1" select="$rest1" />
                        <xslt:with-param name="nodes2" select="$nodes2" />
                     </xslt:call-template>
                  </xslt:otherwise>
               </xslt:choose>
            </xslt:when>
 
            <!-- else: $first1 = $first2 -->
            <xslt:otherwise>
               <xslt:choose>
                  <!-- Elements: diff -->
                  <xslt:when test="$type1='element'">
            <xslt:call-template name="m:merge">
              <xslt:with-param name="nodes1"
                       select="$first1/node()" />
              <xslt:with-param name="nodes2"
                       select="$first2/node()" />
            </xslt:call-template>
                  </xslt:when>
               </xslt:choose>
 
               <!-- Diff $rest1 and $rest2 -->
               <xslt:call-template name="m:merge">
                  <xslt:with-param name="nodes1" select="$rest1" />
                  <xslt:with-param name="nodes2" select="$rest2" />
               </xslt:call-template>
            </xslt:otherwise>
         </xslt:choose>
      </xslt:otherwise>
   </xslt:choose>
</xslt:template>
<!-- end of template m:merge -->
 
<!-- Comparing single nodes:
     if $node1 and $node2 are equivalent then the template creates a
     text node "=" otherwise a text node "!" -->
<xslt:template name="m:compare-nodes">
   <xslt:param name="node1" />
   <xslt:param name="node2" />
   <xslt:variable name="type1">
      <xslt:apply-templates mode="m:detect-type" select="$node1" />
   </xslt:variable>
   <xslt:variable name="type2">
      <xslt:apply-templates mode="m:detect-type" select="$node2" />
   </xslt:variable>
 
   <xslt:choose>
      <!-- Are $node1 and $node2 element nodes with the same name? -->
      <xslt:when test="$type1='element' and $type2='element' and
                       local-name($node1)=local-name($node2) and
                       namespace-uri($node1)=namespace-uri($node2) and
                       name($node1)!=$dontmerge and name($node2)!=$dontmerge">
         <!-- Comparing the attributes -->
         <xslt:variable name="diff-att">
            <!-- same number ... -->
            <xslt:if test="count($node1/@*)!=count($node2/@*)">.</xslt:if>
            <!-- ... and same name/content -->
            <xslt:for-each select="$node1/@*">
               <xslt:if test="not($node2/@*
                        [local-name()=local-name(current()) and
                         namespace-uri()=namespace-uri(current()) and
                         .=current()])">.</xslt:if>
            </xslt:for-each>
         </xslt:variable>
         <xslt:choose>
            <xslt:when test="string-length($diff-att)!=0">a</xslt:when>
            <xslt:otherwise>=</xslt:otherwise>
         </xslt:choose>
      </xslt:when>
 
      <!-- Other nodes: test for the same type and content -->
      <xslt:when test="$type1!='element' and $type1=$type2 and
                       name($node1)=name($node2) and
                       ($node1=$node2 or
                          ($normalize='yes' and
                           normalize-space($node1)=
                           normalize-space($node2)))">=</xslt:when>
 
      <xslt:when test="$type1!='element' and $type1=$type2 and
                       name($node1)=name($node2) and
                       not($node1=$node2 or
                          ($normalize='yes' and
                           normalize-space($node1)=
                           normalize-space($node2)))">c</xslt:when>
 
      <xslt:when test="$type1!='element' and type2!='element' and $type1!=$type2
                       and name($node1)=name($node2)">t</xslt:when>
 
      <!-- Otherwise: different node types or different name/content -->
      <xslt:otherwise>!</xslt:otherwise>
   </xslt:choose>
</xslt:template>
<!-- end of template m:compare-nodes -->
 
<xslt:template name="m:make-table-row">
   <xslt:param name="node1" />
   <xslt:param name="node2" />
   <xslt:param name="description" />
   <xslt:param name="case" />
   <xslt:variable name="type1">
      <xslt:apply-templates mode="m:detect-type" select="$node1" />
   </xslt:variable>
   <xslt:variable name="type2">
      <xslt:apply-templates mode="m:detect-type" select="$node2" />
   </xslt:variable>
 
   <xslt:choose>
 
      <xslt:when test="contains($case,'2missing')">
    <tr>
      <td><xslt:value-of select="name($node1/..)"/></td>
      <td>Can not find Test Output for Expected result. More Test Output than Expected Results.</td>
      <td>
        <xslt:call-template name="m:format-attributes">
          <xslt:with-param name="node" select="$node1" />
        </xslt:call-template>
      </td>
      <td><xslt:value-of select="$node1"/></td>
      <td><xslt:value-of select="name($node2/..)"/></td>
      <td></td>
      <td></td>
    </tr>
      </xslt:when>
 
      <xslt:when test="contains($case,'1missing')">
    <tr>
      <td><xslt:value-of select="name($node1/..)"/></td>
      <td>Can not find Test Output in Expected Results. More Expected Results than Test Output.
          Id = <xslt:value-of select="$node2/id"/>
          </td>
      <td></td>
      <td></td>
      <td><xslt:value-of select="name($node2/..)"/></td>
      <td>
        <xslt:call-template name="m:format-attributes">
          <xslt:with-param name="node" select="$node2" />
        </xslt:call-template>
      </td>
      <td><xslt:value-of select="$node2"/></td>
    </tr>
      </xslt:when>
 
      <xslt:when test="contains($case,'no1node')">
    <tr>
      <td></td>
      <td>No Test Output found</td>
      <td></td>
      <td></td>
      <td><xslt:value-of select="name($node2/..)"/></td>
      <td></td>
      <td></td>
    </tr>
      </xslt:when>
 
      <xslt:when test="contains($case,'no2node')">
    <tr>
      <td><xslt:value-of select="name($node1/..)"/></td>
      <td>No Expected Results found</td>
      <td></td>
      <td></td>
      <td></td>
      <td></td>
      <td></td>
    </tr>
      </xslt:when>
 
      <xslt:otherwise>
    <tr>
      <td><xslt:value-of select="name($node1/..)"/></td>
      <td><xslt:value-of select="$description"/></td>
      <td>
        <xslt:call-template name="m:format-attributes">
          <xslt:with-param name="node" select="$node1" />
        </xslt:call-template>
      </td>
      <td><xslt:value-of select="$node1"/></td>
      <td><xslt:value-of select="name($node2/..)"/></td>
      <td>
        <xslt:call-template name="m:format-attributes">
          <xslt:with-param name="node" select="$node2" />
        </xslt:call-template>
      </td>
      <td><xslt:value-of select="$node2"/></td>
    </tr>
      </xslt:otherwise>
 
   </xslt:choose>
</xslt:template>
<!-- end of template m:make-table-row -->
 
<xslt:template name="m:format-attributes">
   <xslt:param name="node" />
   <xslt:for-each select="$node/../@*">
     <xslt:value-of select="concat(local-name(current()),'=',current(), ' - ')"/>
   </xslt:for-each>
</xslt:template>
<!-- end of template m:format-attributes -->
 
 
<!-- Type detection, thanks to M. H. Kay -->
<xslt:template match="*" mode="m:detect-type">element</xslt:template>
<xslt:template match="text()" mode="m:detect-type">text</xslt:template>
<xslt:template match="comment()" mode="m:detect-type">comment</xslt:template>
<xslt:template match="processing-instruction()" mode="m:detect-type">pi</xslt:template>
 
</xslt:transform>
 

The input file:

<?xml version="1.0"?>
<merge xmlns="http://informatik.hu-berlin.de/merge">
  <file1>1file.xml</file1>
  <file2>2file.xml</file2>
</merge>