#! /usr/bin/env python3 # For the details of the maven-metadata.xml model # https://maven.apache.org/ref/3.2.1/maven-repository-metadata/repository-metadata.html. # # (c) 2016 by Sebastian Bauer from __future__ import print_function import argparse import binascii import datetime import glob import hashlib import os import os.path import re import sys import xml.dom.minidom as minidom import xml.etree.ElementTree as ET def ids(path): """Extract the group id and artifact id from the path.""" group_id, artifact_id = os.path.split(path) if group_id.startswith('./'): group_id = group_id[2:] group_id = group_id.replace('/','.') return group_id, artifact_id def write_hashs(filename, contents): """Write relevant hashs of contents to filename""" encoded_contents = str(contents).encode('utf-8') m = hashlib.md5() m.update(encoded_contents) with open(filename + '.md5', 'w') as f: f.write(str(binascii.hexlify(m.digest()))) s = hashlib.sha1() s.update(encoded_contents) with open(filename + '.sha1', 'w') as f: f.write(str(binascii.hexlify(s.digest()))) s = hashlib.sha256() s.update(encoded_contents) with open(filename + '.sha256', 'w') as f: f.write(str(binascii.hexlify(s.digest()))) s = hashlib.sha512() s.update(encoded_contents) with open(filename + '.sha512', 'w') as f: f.write(str(binascii.hexlify(s.digest()))) parser = argparse.ArgumentParser(description=""" Updates the maven-metadata.xml file of Maven artifacts in accordance to the contents. It starts at the current working directory. It is very simple and thus may only work for very simple repositories like used by the Ontologizer. Make sure to make a backup before using it. """) parser.add_argument('--do-it', action='store_true', default=False, help=""" Do the actual operation. Without specifying this option, no write file operation will actually happen. """) args = parser.parse_args() do_it = args.do_it utcnow = datetime.datetime.utcnow # Groups contain artifacts that contain version that contains released jars groups = {} # Scan directory and extract jars and where they belong to for root, dir, files in os.walk("."): if '.git' in dir: # Don't enter the .git directory dir.remove('.git') continue for file in [f for f in files if f.endswith('.aar')]: version = os.path.basename(root) other = os.path.dirname(root) group_id, artifact_id = ids(other) if group_id not in groups: groups[group_id] = {} if artifact_id not in groups[group_id]: groups[group_id][artifact_id] = {} if version not in groups[group_id][artifact_id]: groups[group_id][artifact_id][version] = {} groups[group_id][artifact_id][version]["files"] = [] groups[group_id][artifact_id][version]["path"] = root groups[group_id][artifact_id][version]["files"].append(file) # Build maven-metadata.xml for each artifact for group_id, artifacts in groups.items(): for artifact_id, artifact_versions in artifacts.items(): mt = ET.Element("metadata") ET.SubElement(mt,"groupId").text = group_id ET.SubElement(mt,"artifactId").text = artifact_id versioning = ET.SubElement(mt,"versioning") versions = ET.SubElement(versioning,"versions") path = None for version in artifact_versions: if path is None: path = groups[group_id][artifact_id][version]["path"] ET.SubElement(versions,"version").text = version ET.SubElement(versioning, "lastUpdated").text = utcnow().strftime("%Y%m%d%H%M%S") if path is None: sys.exit('Could not find a version for {0}'.format(artifact_id)) # Remove the last part of path (which is a version) path = os.path.dirname(path) filename = os.path.join(path, "maven-metadata.xml") contents = minidom.parseString(ET.tostring(mt)).toprettyxml(indent=" ") if do_it: out = open(filename, mode='w') else: print('Creating file "{0}"'.format(filename),file=sys.stderr) out = sys.stdout out.write(contents) if do_it: out.close() write_hashs(filename, contents) # Build maven-metadata.xml for each individual version of each artifactId for group_id, artifacts in groups.items(): for artifact_id, artifact_versions in artifacts.items(): for version in artifact_versions: files = artifact_versions[version]["files"] path = artifact_versions[version]["path"] # Basically sorts according to the timestamp as the prefix # of the file is all the same files = sorted(files) snapshot_ext = '-SNAPSHOT' is_snapshot = version.endswith(snapshot_ext) mt = ET.Element("metadata") mt.set('modelVersion','1.1.0') ET.SubElement(mt,"groupId").text = group_id ET.SubElement(mt,"artifactId").text = artifact_id ET.SubElement(mt,"version").text = version versioning = ET.SubElement(mt,"versioning") if is_snapshot: # Version without -SNAPHOT suffix plain_version = version.replace(snapshot_ext,"") # Regexp for extracting the timestamp regexp = "{0}-{1}-(\d+.\d+)-.*".format(artifact_id,plain_version) pat = re.compile(regexp) def stamp_of(f): """ Return the time stamp of the given filename or the empty string if no time stamp could be found """ m = pat.match(f) if m: return m.group(1) return "" stamps = [stamp_of(s) for s in files] new_stem = [] build_number = 0 for s in stamps: build_number = build_number + 1 new_stem.append("{0}-{1}-{2}-{3}".format(artifact_id, plain_version, s, build_number)) snapshot = ET.SubElement(versioning,"snapshot") ET.SubElement(snapshot,"timestamp").text = str(stamps[-1]) ET.SubElement(snapshot,"buildNumber").text = str(build_number) snapshotVersions = ET.SubElement(versioning,"snapshotVersions") # Old names old_stem = [os.path.splitext(f)[0] for f in files] # Rename all files beginning with the stem for old, new, stamp in zip(old_stem,new_stem,stamps): p = os.path.join(path,old) # The old names of all files sharing the stem old_files = glob.glob(p + "*") # The new (final) names of all files sharing the stem new_files = [os.path.join(path,new + f[len(p):]) for f in old_files] # Temporary names tmp_path = os.path.join(path, "tmp") tmp_files = [os.path.join(tmp_path,new + f[len(p):]) for f in old_files] # Finanlly, rename the files but move them into a temporary # folder first to avoid clashes that could happen on certain # sorting orders if do_it: os.mkdir(tmp_path) else: print('Creating directory "{0}""'.format(tmp_path),file=sys.stderr) for old_f, tmp_f in zip(old_files, tmp_files): if do_it: os.rename(old_f, tmp_f) else: print('Renaming "{0}" to "{1}"'.format(old_f, tmp_f),file=sys.stderr) for tmp_f, new_f in zip(tmp_files, new_files): if do_it: os.rename(tmp_f, new_f) else: print('Renaming "{0}" to "{1}"'.format(tmp_f, new_f),file=sys.stderr) if do_it: os.rmdir(tmp_path) else: print('Removing directoy "{0}"'.format(tmp_path),file=sys.stderr) # Get file name extensions that are relevant for f in new_files: # Get extension (without dot) ext = os.path.splitext(f)[1][1:] if ext == 'sha1': continue if ext == 'md5': continue # FIXME: Extract the classifier from the filename classifier = None snapshotVersion = ET.SubElement(snapshotVersions, "snapshotVersion") if classifier is not None: ET.SubElement(snapshotVersion,"classifier").text = classifier ET.SubElement(snapshotVersion,"extension").text = ext ET.SubElement(snapshotVersion,"value").text = new[len(artifact_id)+1:] ET.SubElement(snapshotVersion,"updated").text = stamp.translate(None,'.') # Write out the version-specific maven-metadata.xml filename = os.path.join(path, "maven-metadata.xml") contents = minidom.parseString(ET.tostring(mt)).toprettyxml(indent=" ") if do_it: out = open(filename, mode='w') else: print('Creating file "{0}"'.format(filename),file=sys.stderr) out = sys.stdout out.write(contents) if do_it: out.close() write_hashs(filename, contents) else: pass