From 6d3d6490ebe693b0c798bfa2e43923043387baf2 Mon Sep 17 00:00:00 2001 From: Victor Romero Date: Tue, 5 Jan 2021 14:36:46 -0800 Subject: [vcpkg] Improve versioning files generators (#15172) * Remove port version splitting from x-history * Parallelize versions file generator * Use cpu_count()/2 to avoid crashes * Use generatePortVersionsDb.py output to generate baseline * Update scripts/generateBaseline.py Co-authored-by: Adam Johnson * rename generateBaseline function * Update toolsrc/src/vcpkg/commands.porthistory.cpp Co-authored-by: ras0219 <533828+ras0219@users.noreply.github.com> * Remove unused code Co-authored-by: Adam Johnson Co-authored-by: ras0219 <533828+ras0219@users.noreply.github.com> --- scripts/generateBaseline.py | 103 +++++++++++++++++++++++--------------- scripts/generatePortVersionsDb.py | 103 ++++++++++++++------------------------ 2 files changed, 100 insertions(+), 106 deletions(-) (limited to 'scripts') diff --git a/scripts/generateBaseline.py b/scripts/generateBaseline.py index 45c424a7d..55bea6ba2 100644 --- a/scripts/generateBaseline.py +++ b/scripts/generateBaseline.py @@ -1,50 +1,71 @@ import os -import json -import subprocess import sys +import json +import time + +from pathlib import Path + SCRIPT_DIRECTORY = os.path.dirname(os.path.abspath(__file__)) +PORTS_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../ports') +VERSIONS_DB_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../port_versions') + +def generate_baseline(): + start_time = time.time() -def generate_baseline(ports_path, output_filepath): + # Assume each directory in ${VCPKG_ROOT}/ports is a different port port_names = [item for item in os.listdir( - ports_path) if os.path.isdir(os.path.join(ports_path, item))] + PORTS_DIRECTORY) if os.path.isdir(os.path.join(PORTS_DIRECTORY, item))] port_names.sort() - total = len(port_names) - baseline_versions = {} - for counter, port_name in enumerate(port_names): - vcpkg_exe = os.path.join(SCRIPT_DIRECTORY, '../vcpkg') - print(f'[{counter + 1}/{total}] Getting package info for {port_name}') - output = subprocess.run( - [vcpkg_exe, 'x-package-info', '--x-json', port_name], - capture_output=True, - encoding='utf-8') - - if output.returncode == 0: - package_info = json.loads(output.stdout) - port_info = package_info['results'][port_name] - - version = {} - for scheme in ['version-string', 'version-semver', 'version-date', 'version']: - if scheme in port_info: - version[scheme] = package_info['results'][port_name][scheme] - break - version['port-version'] = 0 - if 'port-version' in port_info: - version['port-version'] = port_info['port-version'] - baseline_versions[port_name] = version - else: - print(f'x-package-info --x-json {port_name} failed: ', output.stdout.strip(), file=sys.stderr) - - output = {} - output['default'] = baseline_versions - - with open(output_filepath, 'r') as output_file: - json.dump(baseline_versions, output_file) - sys.exit(0) - - -if __name__ == '__main__': - generate_baseline( - ports_path=f'{SCRIPT_DIRECTORY}/../ports', output_filepath='baseline.json') + baseline_entries = {} + total_count = len(port_names) + for i, port_name in enumerate(port_names, 1): + port_file_path = os.path.join( + VERSIONS_DB_DIRECTORY, f'{port_name[0]}-', f'{port_name}.json') + + if not os.path.exists(port_file_path): + print( + f'Error: No version file for {port_name}.\n', file=sys.stderr) + continue + sys.stderr.write( + f'\rProcessed {i}/{total_count} ({i/total_count:.2%})') + with open(port_file_path, 'r') as db_file: + try: + versions_object = json.load(db_file) + if versions_object['versions']: + last_version = versions_object['versions'][0] + version_obj = {} + if 'version' in last_version: + version_obj['version'] = last_version['version'] + elif 'version-date' in last_version: + version_obj['version-date'] = last_version['version-date'] + elif 'version-semver' in last_version: + version_obj['version-semver'] - last_version['version-semver'] + else: + version_obj['version-string'] = last_version['version-string'] + version_obj['port-version'] = last_version['port-version'] + baseline_entries[port_name] = version_obj + except json.JSONDecodeError as e: + print(f'Error: Decoding {port_file_path}\n{e}\n') + baseline_object = {} + baseline_object['default'] = baseline_entries + + os.makedirs(VERSIONS_DB_DIRECTORY, exist_ok=True) + baseline_path = os.path.join(VERSIONS_DB_DIRECTORY, 'baseline.json') + with open(baseline_path, 'w') as baseline_file: + json.dump(baseline_object, baseline_file) + + elapsed_time = time.time() - start_time + print(f'\nElapsed time: {elapsed_time:.2f} seconds') + + +def main(): + if not os.path.exists(VERSIONS_DB_DIRECTORY): + print(f'Version DB files must exist before generating a baseline.\nRun: `python generatePortVersionsDB`\n') + generate_baseline() + + +if __name__ == "__main__": + main() diff --git a/scripts/generatePortVersionsDb.py b/scripts/generatePortVersionsDb.py index e3c338c64..3b7de6942 100644 --- a/scripts/generatePortVersionsDb.py +++ b/scripts/generatePortVersionsDb.py @@ -1,17 +1,19 @@ import os -import os.path import sys import subprocess import json import time import shutil -from subprocess import CalledProcessError -from json.decoder import JSONDecodeError +import multiprocessing + from pathlib import Path +MAX_PROCESSES = multiprocessing.cpu_count() SCRIPT_DIRECTORY = os.path.dirname(os.path.abspath(__file__)) +PORTS_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../ports') +VERSIONS_DB_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../port_versions') def get_current_git_ref(): @@ -24,91 +26,62 @@ def get_current_git_ref(): return None -def generate_port_versions_db(ports_path, db_path, revision): +def generate_port_versions_file(port_name): + containing_dir = os.path.join(VERSIONS_DB_DIRECTORY, f'{port_name[0]}-') + os.makedirs(containing_dir, exist_ok=True) + + output_file_path = os.path.join(containing_dir, f'{port_name}.json') + if not os.path.exists(output_file_path): + env = os.environ.copy() + env['GIT_OPTIONAL_LOCKS'] = '0' + output = subprocess.run( + [os.path.join(SCRIPT_DIRECTORY, '../vcpkg.exe'), + 'x-history', port_name, '--x-json', f'--output={output_file_path}'], + capture_output=True, encoding='utf-8', env=env) + if output.returncode != 0: + print(f'x-history {port_name} failed: ', + output.stdout.strip(), file=sys.stderr) + + +def generate_port_versions_db(revision): start_time = time.time() # Assume each directory in ${VCPKG_ROOT}/ports is a different port port_names = [item for item in os.listdir( - ports_path) if os.path.isdir(os.path.join(ports_path, item))] - port_names.sort() + PORTS_DIRECTORY) if os.path.isdir(os.path.join(PORTS_DIRECTORY, item))] total_count = len(port_names) - # Dictionary to collect the latest version of each port as baseline - baseline_objects = {} - baseline_objects['default'] = {} - - for counter, port_name in enumerate(port_names): - containing_dir = os.path.join(db_path, f'{port_name[0]}-') - os.makedirs(containing_dir, exist_ok=True) - - output_filepath = os.path.join(containing_dir, f'{port_name}.json') - if not os.path.exists(output_filepath): - output = subprocess.run( - [os.path.join(SCRIPT_DIRECTORY, '../vcpkg'), - 'x-history', port_name, '--x-json'], - capture_output=True, encoding='utf-8') - - if output.returncode == 0: - try: - versions_object = json.loads(output.stdout) - - # Put latest version in baseline dictionary - latest_version = versions_object["versions"][0] - baseline_objects['default'][port_name] = { - "version-string": latest_version["version-string"], - "port-version": latest_version["port-version"] - } - with open(output_filepath, 'w') as output_file: - json.dump(versions_object, output_file) - except JSONDecodeError: - print( - f'Malformed JSON from vcpkg x-history {port_name}: ', output.stdout.strip(), file=sys.stderr) - else: - print(f'x-history {port_name} failed: ', - output.stdout.strip(), file=sys.stderr) - - # This should be replaced by a progress bar - if counter > 0 and counter % 100 == 0: - elapsed_time = time.time() - start_time - print( - f'Processed {counter} out of {total_count}. Elapsed time: {elapsed_time:.2f} seconds') - - # Generate baseline.json - baseline_file_path = os.path.join(db_path, 'baseline.json') - with open(baseline_file_path, 'w') as baseline_output_file: - json.dump(baseline_objects, baseline_output_file) + concurrency = MAX_PROCESSES / 2 + print(f'Running {concurrency:.0f} parallel processes') + process_pool = multiprocessing.Pool(MAX_PROCESSES) + for i, _ in enumerate(process_pool.imap_unordered(generate_port_versions_file, port_names), 1): + sys.stderr.write( + f'\rProcessed: {i}/{total_count} ({(i / total_count):.2%})') + process_pool.close() + process_pool.join() # Generate timestamp - rev_file = os.path.join(db_path, revision) + rev_file = os.path.join(VERSIONS_DB_DIRECTORY, revision) Path(rev_file).touch() elapsed_time = time.time() - start_time print( - f'Processed {total_count} total ports. Elapsed time: {elapsed_time:.2f} seconds') + f'\nElapsed time: {elapsed_time:.2f} seconds') -def main(ports_path, db_path): +def main(): revision = get_current_git_ref() if not revision: print('Couldn\'t fetch current Git revision', file=sys.stderr) sys.exit(1) - rev_file = os.path.join(db_path, revision) + rev_file = os.path.join(VERSIONS_DB_DIRECTORY, revision) if os.path.exists(rev_file): print(f'Database files already exist for commit {revision}') sys.exit(0) - if (os.path.exists(db_path)): - try: - shutil.rmtree(db_path) - except OSError as e: - print(f'Could not delete folder: {db_path}.\nError: {e.strerror}') - - generate_port_versions_db(ports_path=ports_path, - db_path=db_path, - revision=revision) + generate_port_versions_db(revision) if __name__ == "__main__": - main(ports_path=os.path.join(SCRIPT_DIRECTORY, '../ports'), - db_path=os.path.join(SCRIPT_DIRECTORY, '../port_versions')) + main() -- cgit v1.2.3