All Downloads are FREE. Search and download functionalities are using the official Maven repository.

util.diff_xml_files.py Maven / Gradle / Ivy

The newest version!
#!/usr/bin/env python
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import getopt, glob, os, sys
 
def main(argv):
    f1 = ""
    f2 = ""
     
    # Get the base folder
    try:
        opts, args = getopt.getopt(argv, "h", ["f1=", "f2="])
    except getopt.GetoptError:
        print 'The file options for build_saxon_collection_xml.py were not correctly specified.'
        print 'To see a full list of options try:'
        print '  $ python build_saxon_collection_xml.py -h'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'Options:'
            print '    -f        The base folder to create collection XML file.'
            sys.exit()
        elif opt in ('--f1'):
            # check if file exists.
            if os.path.exists(arg):
                f1 = arg
            else:
                print 'Error: Argument must be a file name for --f1.'
                sys.exit()
        elif opt in ('--f2'):
            # check if file exists.
            if os.path.exists(arg):
                f2 = arg
            else:
                print 'Error: Argument must be a file name for --f2.'
                sys.exit()

    # Required fields to run the script.
    if f1 == "" or not os.path.exists(f1):
        print 'Error: The file path option must be supplied:  --f1.'
        sys.exit()
    if f2 == "" or not os.path.exists(f2):
        print 'Error: The file path option must be supplied:  --f2.'
        sys.exit()
      
    missing_in_f1 = []
    missing_in_f2 = []
    found_in_both = []
    
    with open(f1) as f:
        content_f1 = f.readlines()
    set_f1 = set(content_f1)
    
    
    with open(f2) as f:
        content_f2 = f.readlines()
    set_f2 = set(content_f2)
    
    missing_in_f1 = set_f2.difference(set_f1)
    missing_in_f2 = set_f1.difference(set_f2)
    found_in_both = set_f1.intersection(set_f2)
    
    print ""
    print "Missing files in " + f1
    for f1_name in missing_in_f1:
        print " + " + f1_name.strip()

    print ""
    print "Missing files in " + f2
    for f2_name in missing_in_f2:
        print " + " + f2_name.strip()
    
    offset = 40
    print ""
    print "XML Summary"
    print (" - Found in both:").ljust(offset) + str(len(found_in_both))
    print (" - " + f1 + " diff set vs list:").ljust(offset) + str(len(content_f1) - len(set_f1))
    print (" - " + f2 + " diff set vs list:").ljust(offset) + str(len(content_f2) - len(set_f2))
    print (" - " + f1 + " missing:").ljust(offset) + str(len(missing_in_f1))
    print (" - " + f2 + " missing:").ljust(offset) + str(len(missing_in_f2))
    

if __name__ == "__main__":
    main(sys.argv[1:])




© 2015 - 2025 Weber Informatics LLC | Privacy Policy