liboqs/scripts/update_docs_from_yaml.py
Jason Goertzen 769412623c
Adding multiple upstream support to doc generation (#1123)
* Fix an issue where run_tests tried to run tests from upstream. Fix by @DStebila

* No longer checking spdx on files copied from upstream

* Added compiler checking for gcc. Must be at least 9.4.0.

* Added pqclean's aarch64 optimized kyber implementation to liboqs

* stripping asimd flag for ARM64_V8 as it isn't needed and will cause errors when trying to specify it during compilation

* Updated kyber docs

* Updated kyber markdown so that no-secret-dependent-branching-checked-by-valgrind correctly says false for the new aarch64 implementation

* Added automated updating of legacy yaml format, and added some yaml files which don't get automatically updated

* Fixed a bug where optimized upstreams would use the primary upstream's supported platform

* Fixed a bug with copy_from_upstream.py's verify function where it would always use the default upstream location when comparing with diff

* Added reporting for implementations that are in upstream but aren't yet integrated into liboqs

* Added support to ignore implementations from upstream in copy_from_upstream.yml. This is to prevent implementations that haven't been integrated into LIBOQS yet from being pulled in by copy_from_upstream.py. It also silences the warning when verifying.

* Adding updated ymls and md files for docs

* copy_from_upstream.py now also calls update_docs_from_yaml.py
2021-11-18 06:56:19 +01:00

319 lines
17 KiB
Python

# SPDX-License-Identifier: MIT
import argparse
import sys
import glob
import tabulate
import yaml
import os
parser = argparse.ArgumentParser()
parser.add_argument("--liboqs-root", default=".")
args = parser.parse_args()
def load_yaml(filename, encoding='utf-8'):
with open(filename, mode='r', encoding=encoding) as fh:
return yaml.safe_load(fh.read())
def file_get_contents(filename, encoding=None):
with open(filename, mode='r', encoding=encoding) as fh:
return fh.read()
kem_yamls = []
sig_yamls = []
########################################
# Update the KEM markdown documentation.
########################################
for kem_yaml_path in sorted(glob.glob(os.path.join(args.liboqs_root, 'docs', 'algorithms', 'kem', '*.yml'))):
kem_yaml = load_yaml(kem_yaml_path)
kem_yamls.append(kem_yaml)
kem_name = os.path.splitext(os.path.basename(kem_yaml_path))[0]
print('Updating {}/{}.md'.format(os.path.dirname(kem_yaml_path), kem_name))
with open(os.path.join(args.liboqs_root, 'docs', 'algorithms', 'kem', '{}.md'.format(kem_name)), mode='w', encoding='utf-8') as out_md:
out_md.write('# {}\n\n'.format(kem_yaml['name']))
out_md.write('- **Algorithm type**: Key encapsulation mechanism.\n')
out_md.write('- **Main cryptographic assumption**: {}.\n'.format(kem_yaml['crypto-assumption']))
out_md.write('- **Principal submitters**: {}.\n'.format(', '.join(kem_yaml['principal-submitters'])))
if 'auxiliary-submitters' in kem_yaml and kem_yaml['auxiliary-submitters']:
out_md.write('- **Auxiliary submitters**: {}.\n'.format(', '.join(kem_yaml['auxiliary-submitters'])))
out_md.write('- **Authors\' website**: {}\n'.format(kem_yaml['website']))
out_md.write('- **Specification version**: {}.\n'.format(kem_yaml['spec-version']))
out_md.write('- **Primary Source**<a name="primary-source"></a>:\n')
out_md.write(' - **Source**: {}\n'.format(kem_yaml['primary-upstream']['source']))
out_md.write(' - **Implementation license (SPDX-Identifier)**: {}\n'.format(kem_yaml['primary-upstream']['spdx-license-identifier']))
if 'optimized-upstreams' in kem_yaml:
out_md.write('- **Optimized Implementation sources**: {}\n'.format(kem_yaml['primary-upstream']['source']))
for opt_upstream in kem_yaml['optimized-upstreams']:
out_md.write(' - **{}**:<a name="{}"></a>\n'.format(opt_upstream, opt_upstream))
out_md.write(' - **Source**: {}\n'.format(kem_yaml['optimized-upstreams'][opt_upstream]['source']))
out_md.write(' - **Implementation license (SPDX-Identifier)**: {}\n'.format(kem_yaml['optimized-upstreams'][opt_upstream]['spdx-license-identifier']))
if 'upstream-ancestors' in kem_yaml:
out_md.write(', which takes it from:\n')
for url in kem_yaml['upstream-ancestors'][:-1]:
out_md.write(' - {}, which takes it from:\n'.format(url))
out_md.write(' - {}\n'.format(kem_yaml['upstream-ancestors'][-1]))
else:
out_md.write('\n')
out_md.write('\n## Parameter set summary\n\n')
table = [['Parameter set',
'Security model',
'Claimed NIST Level',
'Public key size (bytes)',
'Secret key size (bytes)',
'Ciphertext size (bytes)',
'Shared secret size (bytes)']]
for parameter_set in kem_yaml['parameter-sets']:
table.append([parameter_set['name'],
parameter_set['claimed-security'],
parameter_set['claimed-nist-level'],
parameter_set['length-public-key'],
parameter_set['length-secret-key'],
parameter_set['length-ciphertext'],
parameter_set['length-shared-secret']])
out_md.write(tabulate.tabulate(table, tablefmt="pipe", headers="firstrow", colalign=("center",)))
out_md.write('\n')
for index, parameter_set in enumerate(kem_yaml['parameter-sets']):
out_md.write('\n## {} implementation characteristics\n\n'.format(parameter_set['name']))
table_header = ['Implementation source',
'Identifier in upstream',
'Supported architecture(s)',
'Supported operating system(s)',
'CPU extension(s) used',
'No branching-on-secrets claimed?',
'No branching-on-secrets checked by valgrind?']
if index == 0:
table_header.append('Large stack usage?‡')
else:
table_header.append('Large stack usage?')
table = [table_header]
for impl in parameter_set['implementations']:
# todo, automate linking this?
# if all platforms are supported, assuming not optimized and is primary upstream
if impl['supported-platforms'] == 'all':
table.append(['[Primary Source](#primary-source)',
impl['upstream-id'].replace('_', '\_'),
'All',
'All',
'None',
impl['no-secret-dependent-branching-claimed'],
impl['no-secret-dependent-branching-checked-by-valgrind'],
impl['large-stack-usage']])
else:
for platform in impl['supported-platforms']:
op_systems = ','.join(platform['operating_systems'])
if 'required_flags' in platform and platform['required_flags']:
flags = ','.join(flag.upper() for flag in platform['required_flags'])
else:
flags = 'None'
if impl['upstream'] == 'primary-upstream':
name = 'Primary Source'
anchor = 'primary-source'
else:
name = impl['upstream']
anchor = impl['upstream']
upstream_name = '[{}](#{})'.format(name, anchor)
table.append([upstream_name,
impl['upstream-id'].replace('_', '\_'),
platform['architecture'].replace('_', '\_'),
op_systems,
flags,
impl['no-secret-dependent-branching-claimed'],
impl['no-secret-dependent-branching-checked-by-valgrind'],
impl['large-stack-usage']])
out_md.write(tabulate.tabulate(table, tablefmt="pipe", headers="firstrow", colalign=("center",)))
out_md.write('\n')
if 'implementations-switch-on-runtime-cpu-features' in parameter_set:
out_md.write('\nAre implementations chosen based on runtime CPU feature detection? **{}**.\n'.format('Yes' if parameter_set['implementations-switch-on-runtime-cpu-features'] else 'No'))
if index == 0:
out_md.write('\n ‡For an explanation of what this denotes, consult the [Explanation of Terms](#explanation-of-terms) section at the end of this file.\n')
out_md.write('\n## Explanation of Terms\n\n')
out_md.write('- **Large Stack Usage**: Implementations identified as having such may cause failures when running in threads or in constrained environments.')
##############################################
# Update the signature markdown documentation.
##############################################
for sig_yaml_path in sorted(glob.glob(os.path.join(args.liboqs_root, 'docs', 'algorithms', 'sig', '*.yml'))):
sig_yaml = load_yaml(sig_yaml_path)
sig_yamls.append(sig_yaml)
sig_name = os.path.splitext(os.path.basename(sig_yaml_path))[0]
print('Updating {}/{}.md'.format(os.path.dirname(sig_yaml_path), sig_name))
with open(os.path.join(args.liboqs_root, 'docs', 'algorithms', 'sig', '{}.md'.format(sig_name)), mode='w', encoding='utf-8') as out_md:
out_md.write('# {}\n\n'.format(sig_yaml['name']))
out_md.write('- **Algorithm type**: Digital signature scheme.\n')
out_md.write('- **Main cryptographic assumption**: {}.\n'.format(sig_yaml['crypto-assumption']))
out_md.write('- **Principal submitters**: {}.\n'.format(', '.join(sig_yaml['principal-submitters'])))
if 'auxiliary-submitters' in sig_yaml and sig_yaml['auxiliary-submitters']:
out_md.write('- **Auxiliary submitters**: {}.\n'.format(', '.join(sig_yaml['auxiliary-submitters'])))
out_md.write('- **Authors\' website**: {}\n'.format(sig_yaml['website']))
out_md.write('- **Specification version**: {}.\n'.format(sig_yaml['spec-version']))
out_md.write('- **Implementation source**: {}'.format(sig_yaml['upstream']))
if 'upstream-ancestors' in sig_yaml:
out_md.write(', which takes it from:\n')
for url in sig_yaml['upstream-ancestors'][:-1]:
out_md.write(' - {}, which takes it from:\n'.format(url))
out_md.write(' - {}\n'.format(sig_yaml['upstream-ancestors'][-1]))
else:
out_md.write('\n')
out_md.write('- **Implementation license (SPDX-Identifier)**: {}.\n'.format(sig_yaml['spdx-license-identifier']))
out_md.write('\n## Parameter set summary\n\n')
table = [['Parameter set',
'Security model',
'Claimed NIST Level',
'Public key size (bytes)',
'Secret key size (bytes)',
'Signature size (bytes)']]
for parameter_set in sig_yaml['parameter-sets']:
table.append([parameter_set['name'].replace('_', '\_'),
parameter_set['claimed-security'],
parameter_set['claimed-nist-level'],
parameter_set['length-public-key'],
parameter_set['length-secret-key'],
parameter_set['length-signature']])
out_md.write(tabulate.tabulate(table, tablefmt="pipe", headers="firstrow", colalign=("center",)))
out_md.write('\n')
for index, parameter_set in enumerate(sig_yaml['parameter-sets']):
out_md.write('\n## {} implementation characteristics\n\n'.format(parameter_set['name'].replace('_', '\_')))
table_header = ['Identifier in upstream',
'Supported architecture(s)',
'Supported operating system(s)',
'CPU extension(s) used',
'No branching-on-secrets claimed?',
'No branching-on-secrets checked by valgrind?']
if index == 0:
table_header.append('Large stack usage?‡')
else:
table_header.append('Large stack usage?')
table = [table_header]
for impl in parameter_set['implementations']:
if impl['supported-platforms'] == 'all':
table.append([impl['upstream-id'].replace('_', '\_'),
'All',
'All',
'None',
impl['no-secret-dependent-branching-claimed'],
impl['no-secret-dependent-branching-checked-by-valgrind'],
impl['large-stack-usage']])
else:
for platform in impl['supported-platforms']:
if 'operating_systems' in platform:
op_systems = ','.join(platform['operating_systems'])
else:
op_systems = 'All'
if 'required_flags' in platform:
flags = ','.join(flag.upper() for flag in platform['required_flags'])
else:
flags = 'None'
table.append([impl['upstream-id'].replace('_', '\_'),
platform['architecture'].replace('_', '\_'),
op_systems,
flags,
impl['no-secret-dependent-branching-claimed'],
impl['no-secret-dependent-branching-checked-by-valgrind'],
impl['large-stack-usage']])
out_md.write(tabulate.tabulate(table, tablefmt="pipe", headers="firstrow", colalign=("center",)))
out_md.write('\n')
if 'implementations-switch-on-runtime-cpu-features' in parameter_set:
out_md.write('\nAre implementations chosen based on runtime CPU feature detection? **{}**.\n'.format('Yes' if parameter_set['implementations-switch-on-runtime-cpu-features'] else 'No'))
if index == 0:
out_md.write('\n ‡For an explanation of what this denotes, consult the [Explanation of Terms](#explanation-of-terms) section at the end of this file.\n')
out_md.write('\n## Explanation of Terms\n\n')
out_md.write('- **Large Stack Usage**: Implementations identified as having such may cause failures when running in threads or in constrained environments.')
####################
# Update the README.
####################
print("Updating README.md")
readme_path = os.path.join(args.liboqs_root, 'README.md')
start_identifier_tmpl = '<!--- OQS_TEMPLATE_FRAGMENT_LIST_{}_START -->'
end_identifier_tmpl = '<!--- OQS_TEMPLATE_FRAGMENT_LIST_{}_END -->'
# KEMS
readme_contents = file_get_contents(readme_path)
identifier_start = start_identifier_tmpl.format('KEXS')
identifier_end = end_identifier_tmpl.format('KEXS')
preamble = readme_contents[:readme_contents.find(identifier_start)]
postamble = readme_contents[readme_contents.find(identifier_end):]
with open(readme_path, mode='w', encoding='utf-8') as readme:
readme.write(preamble + identifier_start + '\n')
for kem_yaml in kem_yamls:
parameter_sets = kem_yaml['parameter-sets']
if any(impl['large-stack-usage'] for impl in parameter_sets[0]['implementations']):
readme.write('- **{}**: {}'.format(kem_yaml['name'], parameter_sets[0]['name']))
else:
readme.write('- **{}**: {}'.format(kem_yaml['name'], parameter_sets[0]['name']))
for parameter_set in parameter_sets[1:]:
if any(impl['large-stack-usage'] for impl in parameter_set['implementations']):
readme.write(', {}'.format(parameter_set['name']))
else:
readme.write(', {}'.format(parameter_set['name']))
readme.write('\n')
readme.write(postamble)
# Signatures
readme_contents = file_get_contents(readme_path)
identifier_start = start_identifier_tmpl.format('SIGS')
identifier_end = end_identifier_tmpl.format('SIGS')
preamble = readme_contents[:readme_contents.find(identifier_start)]
postamble = readme_contents[readme_contents.find(identifier_end):]
with open(readme_path, mode='w', encoding='utf-8') as readme:
readme.write(preamble + identifier_start + '\n')
for sig_yaml in sig_yamls[:-1]: # SPHINCS is last in this sorted list and requires special handling.
parameter_sets = sig_yaml['parameter-sets']
if any(impl['large-stack-usage'] for impl in parameter_sets[0]['implementations']):
readme.write('- **{}**: {}'.format(sig_yaml['name'], parameter_sets[0]['name'].replace('_','\_')))
else:
readme.write('- **{}**: {}'.format(sig_yaml['name'], parameter_sets[0]['name'].replace('_','\_')))
for parameter_set in parameter_sets[1:]:
if any(impl['large-stack-usage'] for impl in parameter_set['implementations']):
readme.write(', {}'.format(parameter_set['name'].replace('_', '\_')))
else:
readme.write(', {}'.format(parameter_set['name'].replace('_', '\_')))
readme.write('\n')
sphincs_yml = sig_yamls[-1]
for hash_func in ['Haraka', 'SHA256', 'SHAKE256']:
parameter_sets = [pset for pset in sphincs_yml['parameter-sets'] if hash_func in pset['name']]
if any(impl['large-stack-usage'] for impl in parameter_sets[0]['implementations']):
readme.write('- **SPHINCS+-{}**: {}'.format(hash_func, parameter_sets[0]['name'].replace('_','\_')))
else:
readme.write('- **SPHINCS+-{}**: {}'.format(hash_func, parameter_sets[0]['name'].replace('_','\_')))
for parameter_set in parameter_sets[1:]:
if any(impl['large-stack-usage'] for impl in parameter_set['implementations']):
readme.write(', {}'.format(parameter_set['name'].replace('_', '\_')))
else:
readme.write(', {}'.format(parameter_set['name'].replace('_', '\_')))
readme.write('\n')
readme.write(postamble)