From c96cd64af0131094c01e7e5c28221002ae319b7d Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 24 Feb 2021 18:50:48 +0200 Subject: [PATCH 01/86] Add a refactored Updater.refresh() A proposal of a new Updater.refresh() implementation: - based on metadata API - no longer dependent on keydb/roledb - follows the TUF specification's client workflow Introduces a MetadataWrapper class with the goal of providing functionality which is at this point missing in metadata API. Signed-off-by: Teodora Sechkova --- tuf/client_rework/README.md | 9 + tuf/client_rework/__init__.py | 0 tuf/client_rework/metadata_wrapper.py | 186 ++++++++++ tuf/client_rework/updater_rework.py | 515 ++++++++++++++++++++++++++ 4 files changed, 710 insertions(+) create mode 100644 tuf/client_rework/README.md create mode 100644 tuf/client_rework/__init__.py create mode 100644 tuf/client_rework/metadata_wrapper.py create mode 100644 tuf/client_rework/updater_rework.py diff --git a/tuf/client_rework/README.md b/tuf/client_rework/README.md new file mode 100644 index 0000000000..aa05e534c8 --- /dev/null +++ b/tuf/client_rework/README.md @@ -0,0 +1,9 @@ +# updater.py +**updater.py** is intended as the only TUF module that software update +systems need to utilize for a low-level integration. It provides a single +class representing an updater that includes methods to download, install, and +verify metadata or target files in a secure manner. Importing +**tuf.client.updater** and instantiating its main class is all that is +required by the client prior to a TUF update request. The importation and +instantiation steps allow TUF to load all of the required metadata files +and set the repository mirror information. diff --git a/tuf/client_rework/__init__.py b/tuf/client_rework/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tuf/client_rework/metadata_wrapper.py b/tuf/client_rework/metadata_wrapper.py new file mode 100644 index 0000000000..a25a278234 --- /dev/null +++ b/tuf/client_rework/metadata_wrapper.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python + +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Metadata wrapper +""" +import time + +from securesystemslib.keys import format_metadata_to_key +from tuf.api import metadata +import tuf.exceptions + + + + +class MetadataWrapper: + """Helper classes extending or adding missing + functionality to metadata API + """ + + def __init__(self, meta): + self._meta = meta + + @classmethod + def from_json_object(cls, tmp_file): + """Loads JSON-formatted TUF metadata from a file object. + """ + raw_data = tmp_file.read() + + from tuf.api.serialization.json import JSONDeserializer + deserializer = JSONDeserializer() + _meta = deserializer.deserialize(raw_data) + return cls(meta=_meta) + + + @classmethod + def from_json_file(cls, filename): + """Loads JSON-formatted TUF metadata from a file. + """ + _meta = metadata.Metadata.from_file(filename) + return cls(meta=_meta) + + @property + def signed(self): + """ + TODO + """ + return self._meta.signed + + @property + def version(self): + """ + TODO + """ + return self._meta.signed.version + + + def verify(self, keys, threshold): + """ + TODO + """ + verified = 0 + # 1.3. Check signatures + for key in keys: + self._meta.verify(key) + verified+=1 + + if verified < threshold: + raise tuf.exceptions.InsufficientKeysError + + + def persist(self, filename): + """ + TODO + """ + self._meta.to_file(filename) + + + def expires(self, reference_time=None): + """ + TODO + """ + if reference_time is None: + expires_timestamp = tuf.formats.datetime_to_unix_timestamp( + self._meta.signed.expires) + reference_time = int(time.time()) + + if expires_timestamp < reference_time: + raise tuf.exceptions.ExpiredMetadataError + + + + +class RootWrapper(MetadataWrapper): + """ + TODO + """ + def keys(self, role): + """ + TODO + """ + keys = [] + for keyid in self._meta.signed.roles[role]['keyids']: + key_metadata = self._meta.signed.keys[keyid] + key, _ = format_metadata_to_key(key_metadata) + keys.append(key) + + return keys + + + def threshold(self, role): + """ + TODO + """ + return self._meta.signed.roles[role]['threshold'] + + + +class TimestampWrapper(MetadataWrapper): + """ + TODO + """ + @property + def snapshot(self): + """ + TODO + """ + return self._meta.signed.meta['snapshot.json'] + + +class SnapshotWrapper(MetadataWrapper): + """ + TODO + """ + def role(self, name): + """ + TODO + """ + return self._meta.signed.meta[name + '.json'] + + + +class TargetsWrapper(MetadataWrapper): + """ + TODO + """ + @property + def targets(self): + """ + TODO + """ + return self._meta.signed.targets + + + @property + def delegations(self): + """ + TODO + """ + return self._meta.signed.delegations + + + def keys(self, role): + """ + TODO + """ + keys = [] + for delegation in self._meta.signed.delegations['roles']: + if delegation['name'] == role: + for keyid in delegation['keyids']: + key_metadata = self._meta.signed.delegations['keys'][keyid] + key, _ = format_metadata_to_key(key_metadata) + keys.append(key) + return keys + + + def threshold(self, role): + """ + TODO + """ + for delegation in self._meta.signed.delegations['roles']: + if delegation['name'] == role: + return delegation['threshold'] + + return None diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py new file mode 100644 index 0000000000..427cc5595e --- /dev/null +++ b/tuf/client_rework/updater_rework.py @@ -0,0 +1,515 @@ +# Copyright 2020, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""TUF client 1.0.0 draft + +TODO + +""" + +#Imports +import os +import logging +import fnmatch + +from typing import TextIO, BinaryIO, Dict, Optional + +import securesystemslib.exceptions +import securesystemslib.util + +import tuf.settings +import tuf.mirrors +import tuf.download +import tuf.exceptions +import tuf.formats + +from tuf.client.fetcher import FetcherInterface +from tuf.requests_fetcher import RequestsFetcher +from .metadata_wrapper import ( + RootWrapper, + SnapshotWrapper, + TimestampWrapper, + TargetsWrapper +) + +# Globals +logger = logging.getLogger(__name__) + +# Classes +class Updater: + """ + Provides a class that can download target files securely. + + Attributes: + metadata: + + repository_name: + + mirrors: + + fetcher: + + consistent_snapshot: + """ + + def __init__( + self, repository_name: str, + repository_mirrors: Dict, + fetcher: Optional[FetcherInterface]=None): + + self._repository_name = repository_name + self._mirrors = repository_mirrors + self._consistent_snapshot = False + self._metadata = {'root': {}, + 'timestamp': {}, + 'snapshot': {}, + 'targets': {}} + + if fetcher is None: + self._fetcher = RequestsFetcher() + else: + self._fetcher = fetcher + + + def refresh(self) -> None: + """ + This method downloads, verifies, and loads metadata for the top-level + roles in a specific order (root -> timestamp -> snapshot -> targets) + The expiration time for downloaded metadata is also verified. + + The metadata for delegated roles are not refreshed by this method, but + by the method that returns targetinfo (i.e., + get_one_valid_targetinfo()). + + The refresh() method should be called by the client before any target + requests. + """ + + self._load_root() + self._load_timestamp() + self._load_snapshot() + self._load_targets('targets', 'root') + + + def get_one_valid_targetinfo(self, filename: str) -> Dict: + """ + Returns the target information for a specific file identified by its + file path. This target method also downloads the metadata of updated + targets. + """ + return self._preorder_depth_first_walk(filename) + + + def updated_targets(self, targets: Dict, + destination_directory: str) -> Dict: + """ + After the client has retrieved the target information for those targets + they are interested in updating, they would call this method to + determine which targets have changed from those saved locally on disk. + All the targets that have changed are returns in a list. From this + list, they can request a download by calling 'download_target()'. + """ + # Keep track of the target objects and filepaths of updated targets. + # Return 'updated_targets' and use 'updated_targetpaths' to avoid + # duplicates. + updated_targets = [] + updated_targetpaths = [] + + for target in targets: + # Prepend 'destination_directory' to the target's relative filepath + # (as stored in metadata.) Verify the hash of 'target_filepath' + # against each hash listed for its fileinfo. Note: join() discards + # 'destination_directory' if 'filepath' contains a leading path + # separator (i.e., is treated as an absolute path). + filepath = target['filepath'] + target_filepath = os.path.join(destination_directory, filepath) + + if target_filepath in updated_targetpaths: + continue + + # Try one of the algorithm/digest combos for a mismatch. We break + # as soon as we find a mismatch. + for algorithm, digest in target['fileinfo']['hashes'].items(): + digest_object = None + try: + digest_object = securesystemslib.hash.digest_filename( + target_filepath, algorithm=algorithm) + + # This exception will occur if the target does not exist + # locally. + except securesystemslib.exceptions.StorageError: + updated_targets.append(target) + updated_targetpaths.append(target_filepath) + break + + # The file does exist locally, check if its hash differs. + if digest_object.hexdigest() != digest: + updated_targets.append(target) + updated_targetpaths.append(target_filepath) + break + + return updated_targets + + + def download_target(self, target: Dict, destination_directory: str): + """ + This method performs the actual download of the specified target. + The file is saved to the 'destination_directory' argument. + """ + + for temp_obj in self._mirror_target_download(target): + try: + self._verify_target_file(temp_obj, target) + # break? should we break after first successful download? + except Exception as exception: + # TODO: do something with exceptions + raise + + filepath = os.path.join(destination_directory, target['filepath']) + securesystemslib.util.persist_temp_file(temp_obj, filepath) + + + + def _mirror_meta_download( + self, filename: str, upper_length: int) -> TextIO: + """ + Download metadata file from the list of metadata mirrors + """ + file_mirrors = tuf.mirrors.get_list_of_mirrors('meta', filename, + self._mirrors) + + file_mirror_errors = {} + for file_mirror in file_mirrors: + try: + temp_obj = tuf.download.unsafe_download( + file_mirror, + upper_length, + self._fetcher) + + temp_obj.seek(0) + yield temp_obj + + except Exception as exception: + file_mirror_errors[file_mirror] = exception + + finally: + if file_mirror_errors: + raise tuf.exceptions.NoWorkingMirrorError( + file_mirror_errors) + + + def _mirror_target_download(self, fileinfo: str) -> BinaryIO: + """ + Download target file from the list of target mirrors + """ + # full_filename = _get_full_name(filename) + file_mirrors = tuf.mirrors.get_list_of_mirrors( + 'target', fileinfo['filepath'], self._mirrors) + + file_mirror_errors = {} + for file_mirror in file_mirrors: + try: + temp_obj = tuf.download.safe_download( + file_mirror, + fileinfo['fileinfo']['length'], + self._fetcher) + + temp_obj.seek(0) + yield temp_obj + + except Exception as exception: + file_mirror_errors[file_mirror] = exception + + finally: + if file_mirror_errors: + raise tuf.exceptions.NoWorkingMirrorError( + file_mirror_errors) + + + def _get_full_meta_name(self, + role: str, + extension: str ='.json', + version: int = None) -> str: + """ + Helper method returning full metadata file path given the role name + and file extension. + """ + if version is None: + filename = role + extension + else: + filename = str(version) + '.' + role + extension + return os.path.join(tuf.settings.repositories_directory, + self._repository_name, 'metadata', 'current', filename) + + + def _get_relative_meta_name( + self, role: str, + extension: str ='.json', + version: int = None) -> str: + """ + Helper method returning full metadata file path given the role name + and file extension. + """ + if version is None: + filename = role + extension + else: + filename = str(version) + '.' + role + extension + return filename + + + def _load_root(self) -> None: + """ + If metadata file for 'root' role does not exist locally, download it + over a network, verify it and store it permanently. + """ + + # Load trusted root metadata + self._metadata['root'] = RootWrapper.from_json_file( + self._get_full_meta_name('root')) + + # Update the root role + # 1.1. Let N denote the version number of the trusted + # root metadata file. + lower_bound = self._metadata['root']._meta.signed.version + upper_bound = lower_bound + tuf.settings.MAX_NUMBER_ROOT_ROTATIONS + + verified_root = None + for next_version in range(lower_bound, upper_bound): + try: + mirror_download = self._mirror_meta_download( + self._get_relative_meta_name('root', version=next_version), + tuf.settings.DEFAULT_ROOT_REQUIRED_LENGTH) + + for temp_obj in mirror_download: + try: + verified_root = self._verify_root(temp_obj) + + except Exception as exception: + raise + + except tuf.exceptions.NoWorkingMirrorError as exception: + for mirror_error in exception.mirror_errors.values(): + if neither_403_nor_404(mirror_error): + temp_obj.close() + raise + + break + + # Check for a freeze attack. The latest known time MUST be lower + # than the expiration timestamp in the trusted root metadata file + try: + verified_root.expires() + except Exception: + temp_obj.close() + + # 1.9. If the timestamp and / or snapshot keys have been rotated, + # then delete the trusted timestamp and snapshot metadata files. + if (self._metadata['root'].keys('timestamp') != + verified_root.keys('timestamp')): + # FIXME: use abstract storage + os.remove(self._get_full_meta_name('timestamp')) + self._metadata['timestamp'] = {} + + if (self._metadata['root'].keys('snapshot') != + verified_root.keys('snapshot')): + # FIXME: use abstract storage + os.remove(self._get_full_meta_name('snapshot')) + self._metadata['snapshot'] = {} + + self._metadata['root'] = verified_root + # Persist root metadata. The client MUST write the file to non-volatile + # storage as FILENAME.EXT (e.g. root.json). + self._metadata['root'].persist(self._get_full_meta_name('root')) + + # 1.10. Set whether consistent snapshots are used as per + # the trusted root metadata file + self._consistent_snapshot = \ + self._metadata['root'].signed.consistent_snapshot + temp_obj.close() + + + + + + def _load_timestamp(self) -> None: + # TODO Check if timestamp exists locally + for temp_obj in self._mirror_meta_download('timestamp.json', + tuf.settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH): + try: + verified_tampstamp = self._verify_timestamp(temp_obj) + # break? should we break after first successful download? + except Exception as exception: + # TODO: do something with exceptions + temp_obj.close() + raise + + self._metadata['timestamp'] = verified_tampstamp + # Persist root metadata. The client MUST write the file to + # non-volatile storage as FILENAME.EXT (e.g. root.json). + self._metadata['timestamp'].persist( + self._get_full_meta_name('timestamp.json')) + + temp_obj.close() + + + + def _load_snapshot(self) -> None: + + try: + length = self._metadata['timestamp'].snapshot['length'] + except KeyError: + length = tuf.settings.DEFAULT_SNAPSHOT_REQUIRED_LENGTH + + if self._consistent_snapshot: + version = self._metadata['timestamp'].snapshot['version'] + else: + version = None + + #Check if exists locally + # self.loadLocal('snapshot', snapshotVerifier) + for temp_obj in self._mirror_meta_download('snapshot.json', length): + try: + verified_snapshot = self._verify_snapshot(temp_obj) + # break? should we break after first successful download? + except Exception as exception: + # TODO: do something with exceptions + temp_obj.close() + raise + + self._metadata['snapshot'] = verified_snapshot + # Persist root metadata. The client MUST write the file to + # non-volatile storage as FILENAME.EXT (e.g. root.json). + self._metadata['snapshot'].persist( + self._get_full_meta_name('snapshot.json')) + + temp_obj.close() + + + def _load_targets(self, targets_role: str, parent_role: str) -> None: + try: + length = self._metadata['snapshot'].role(targets_role)['length'] + except KeyError: + length = tuf.settings.DEFAULT_TARGETS_REQUIRED_LENGTH + + if self._consistent_snapshot: + version = self._metadata['snapshot'].role(targets_role)['version'] + else: + version = None + + + #Check if exists locally + # self.loadLocal('snapshot', targetsVerifier) + + for temp_obj in self._mirror_meta_download( + targets_role + '.json', length): + try: + verified_targets = self._verify_targets(temp_obj, + targets_role, parent_role) + # break? should we break after first successful download? + except Exception as exception: + # TODO: do something with exceptions + temp_obj.close() + raise + self._metadata[targets_role] = verified_targets + # Persist root metadata. The client MUST write the file to + # non-volatile storage as FILENAME.EXT (e.g. root.json). + self._metadata[targets_role].persist( + self._get_full_meta_name(targets_role, extension='.json')) + + temp_obj.close() + + + + def _verify_root(self, temp_obj: TextIO) -> RootWrapper: + + intermediate_root = RootWrapper.from_json_object(temp_obj) + + # Check for an arbitrary software attack + trusted_root = self._metadata['root'] + intermediate_root.verify(trusted_root.keys('root'), + trusted_root.threshold('root')) + intermediate_root.verify(intermediate_root.keys('root'), + intermediate_root.threshold('root')) + + # Check for a rollback attack. + if intermediate_root.version < trusted_root.version: + temp_obj.close() + raise tuf.exceptions.ReplayedMetadataError( + 'root', intermediate_root.version(), trusted_root.version()) + # Note that the expiration of the new (intermediate) root metadata + # file does not matter yet, because we will check for it in step 1.8. + + return intermediate_root + + + def _verify_timestamp(self, temp_obj: TextIO) -> TimestampWrapper: + intermediate_timestamp = TimestampWrapper.from_json_object(temp_obj) + + # Check for an arbitrary software attack + trusted_root = self._metadata['root'] + intermediate_timestamp.verify( + trusted_root.keys('timestamp'), + trusted_root.threshold('timestamp')) + + # Check for a rollback attack. + if self._metadata['timestamp']: + if (intermediate_timestamp.signed.version <= + self._metadata['timestamp'].version): + temp_obj.close() + raise tuf.exceptions.ReplayedMetadataError( + 'root', intermediate_timestamp.version(), + self._metadata['timestamp'].version()) + + if self._metadata['snapshot']: + if (intermediate_timestamp.snapshot.version <= + self._metadata['timestamp'].snapshot['version']): + temp_obj.close() + raise tuf.exceptions.ReplayedMetadataError( + 'root', intermediate_timestamp.snapshot.version(), + self._metadata['snapshot'].version()) + + intermediate_timestamp.expires() + + return intermediate_timestamp + + + + def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: + + # Check against timestamp metadata + if self._metadata['timestamp'].snapshot.get('hash'): + _check_hashes(temp_obj, + self._metadata['timestamp'].snapshot.get('hash')) + + intermediate_snapshot = SnapshotWrapper.from_json_object(temp_obj) + + if (intermediate_snapshot.version != + self._metadata['timestamp'].snapshot['version']): + temp_obj.close() + raise tuf.exceptions.BadVersionNumberError + + # Check for an arbitrary software attack + trusted_root = self._metadata['root'] + intermediate_snapshot.verify(trusted_root.keys('snapshot'), + trusted_root.threshold('snapshot')) + + # Check for a rollback attack + if self._metadata['snapshot']: + for target_role in intermediate_snapshot.signed.meta: + if (target_role['version'] != + self._metadata['snapshot'].meta[target_role]['version']): + temp_obj.close() + raise tuf.exceptions.BadVersionNumberError + + intermediate_snapshot.expires() + + return intermediate_snapshot + + + + +def neither_403_nor_404(mirror_error): + if isinstance(mirror_error, tuf.exceptions.FetcherHTTPError): + if mirror_error.status_code in {403, 404}: + return False + return True From 1e52ccd4a968744fe08c536abaeccd3d96f6cf53 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 24 Feb 2021 19:06:33 +0200 Subject: [PATCH 02/86] Add targets download functionality to the new Updater Mostly a transfer of the current client code related to the actual target files download. Needs to be further reworked. Signed-off-by: Teodora Sechkova --- tuf/client_rework/updater_rework.py | 271 ++++++++++++++++++++++++++++ 1 file changed, 271 insertions(+) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 427cc5595e..8c69398ee6 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -506,7 +506,278 @@ def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: return intermediate_snapshot + def _verify_targets(self, + temp_obj: TextIO, filename: str, parent_role: str) -> TargetsWrapper: + # Check against timestamp metadata + if self._metadata['snapshot'].role(filename).get('hash'): + _check_hashes(temp_obj, + self._metadata['snapshot'].targets.get('hash')) + + intermediate_targets = TargetsWrapper.from_json_object(temp_obj) + if (intermediate_targets.version != + self._metadata['snapshot'].role(filename)['version']): + temp_obj.close() + raise tuf.exceptions.BadVersionNumberError + + # Check for an arbitrary software attack + parent_role = self._metadata[parent_role] + + intermediate_targets.verify(parent_role.keys(filename), + parent_role.threshold(filename)) + + intermediate_targets.expires() + + return intermediate_targets + + + + def _verify_target_file(self, + temp_obj: BinaryIO, targetinfo: Dict) -> None: + + _check_file_length(temp_obj, targetinfo['fileinfo']['length']) + _check_hashes(temp_obj, targetinfo['fileinfo']['hashes']) + + + + def _preorder_depth_first_walk(self, target_filepath) -> Dict: + + target = None + role_names = [('targets', 'root')] + visited_role_names = set() + number_of_delegations = tuf.settings.MAX_NUMBER_OF_DELEGATIONS + + # Ensure the client has the most up-to-date version of 'targets.json'. + # Raise 'tuf.exceptions.NoWorkingMirrorError' if the changed metadata + # cannot be successfully downloaded and + # 'tuf.exceptions.RepositoryError' if the referenced metadata is + # missing. Target methods such as this one are called after the + # top-level metadata have been refreshed (i.e., updater.refresh()). + # self._update_metadata_if_changed('targets') + + # Preorder depth-first traversal of the graph of target delegations. + while (target is None and + number_of_delegations > 0 and + len(role_names) > 0): + + # Pop the role name from the top of the stack. + role_name, parent_role = role_names.pop(-1) + self._load_targets(role_name, parent_role) + # Skip any visited current role to prevent cycles. + if (role_name, parent_role) in visited_role_names: + logger.debug(f"Skipping visited current role {role_name}") + continue + + # The metadata for 'role_name' must be downloaded/updated before + # its targets, delegations, and child roles can be inspected. + # self._metadata['current'][role_name] is currently missing. + # _refresh_targets_metadata() does not refresh 'targets.json', it + # expects _update_metadata_if_changed() to have already refreshed + # it, which this function has checked above. + # self._refresh_targets_metadata(role_name, + # refresh_all_delegated_roles=False) + + role_metadata = self._metadata[role_name] + targets = role_metadata.targets + target = targets.get(target_filepath) + + # After preorder check, add current role to set of visited roles. + visited_role_names.add((role_name, parent_role)) + + # And also decrement number of visited roles. + number_of_delegations -= 1 + delegations = role_metadata.delegations + child_roles = delegations.get('roles', []) + + if target is None: + + child_roles_to_visit = [] + # NOTE: This may be a slow operation if there are many + # delegated roles. + for child_role in child_roles: + child_role_name = _visit_child_role( + child_role, target_filepath) + + if (child_role['terminating'] and + child_role_name is not None): + logger.debug('Adding child role ' + + repr(child_role_name)) + logger.debug('Not backtracking to other roles.') + role_names = [] + child_roles_to_visit.append( + (child_role_name, role_name)) + break + + if child_role_name is None: + logger.debug('Skipping child role ' + + repr(child_role_name)) + + else: + logger.debug('Adding child role ' + + repr(child_role_name)) + child_roles_to_visit.append( + (child_role_name, role_name)) + + # Push 'child_roles_to_visit' in reverse order of appearance + # onto 'role_names'. Roles are popped from the end of + # the 'role_names' list. + child_roles_to_visit.reverse() + role_names.extend(child_roles_to_visit) + + else: + logger.debug('Found target in current role ' + + repr(role_name)) + + if (target is None and + number_of_delegations == 0 and + len(role_names) > 0): + logger.debug(repr(len(role_names)) + ' roles left to visit, ' + + 'but allowed to visit at most ' + + repr(tuf.settings.MAX_NUMBER_OF_DELEGATIONS) + ' delegations.') + + return {'filepath': target_filepath, 'fileinfo': target} + + + + + + +def _visit_child_role(child_role: Dict, target_filepath: str) -> str: + """ + + Non-public method that determines whether the given 'target_filepath' + is an allowed path of 'child_role'. + + Ensure that we explore only delegated roles trusted with the target. The + metadata for 'child_role' should have been refreshed prior to this point, + however, the paths/targets that 'child_role' signs for have not been + verified (as intended). The paths/targets that 'child_role' is allowed + to specify in its metadata depends on the delegating role, and thus is + left to the caller to verify. We verify here that 'target_filepath' + is an allowed path according to the delegated 'child_role'. + + TODO: Should the TUF spec restrict the repository to one particular + algorithm? Should we allow the repository to specify in the role + dictionary the algorithm used for these generated hashed paths? + + + child_role: + The delegation targets role object of 'child_role', containing its + paths, path_hash_prefixes, keys, and so on. + + target_filepath: + The path to the target file on the repository. This will be relative to + the 'targets' (or equivalent) directory on a given mirror. + + + None. + + + None. + + + If 'child_role' has been delegated the target with the name + 'target_filepath', then we return the role name of 'child_role'. + + Otherwise, we return None. + """ + + child_role_name = child_role['name'] + child_role_paths = child_role.get('paths') + child_role_path_hash_prefixes = child_role.get('path_hash_prefixes') + + if child_role_path_hash_prefixes is not None: + target_filepath_hash = _get_target_hash(target_filepath) + for child_role_path_hash_prefix in child_role_path_hash_prefixes: + if not target_filepath_hash.startswith(child_role_path_hash_prefix): + continue + + return child_role_name + + elif child_role_paths is not None: + # Is 'child_role_name' allowed to sign for 'target_filepath'? + for child_role_path in child_role_paths: + # A child role path may be an explicit path or glob pattern (Unix + # shell-style wildcards). The child role 'child_role_name' is + # returned if 'target_filepath' is equal to or matches + # 'child_role_path'. Explicit filepaths are also considered + # matches. A repo maintainer might delegate a glob pattern with a + # leading path separator, while the client requests a matching + # target without a leading path separator - make sure to strip any + # leading path separators so that a match is made. + # Example: "foo.tgz" should match with "/*.tgz". + if fnmatch.fnmatch(target_filepath.lstrip(os.sep), + child_role_path.lstrip(os.sep)): + logger.debug('Child role ' + repr(child_role_name) + + ' is allowed to sign for ' + repr(target_filepath)) + + return child_role_name + + logger.debug( + 'The given target path ' + repr(target_filepath) + + ' does not match the trusted path or glob pattern: ' + + repr(child_role_path)) + continue + + else: + # 'role_name' should have been validated when it was downloaded. + # The 'paths' or 'path_hash_prefixes' fields should not be missing, + # so we raise a format error here in case they are both missing. + raise tuf.exceptions.FormatError(repr(child_role_name) + ' ' + 'has neither a "paths" nor "path_hash_prefixes". At least' + ' one of these attributes must be present.') + + return None + + +def _check_file_length(file_object, trusted_file_length): + + file_object.seek(0, 2) + observed_length = file_object.tell() + + # Return and log a message if the length 'file_object' is equal to + # 'trusted_file_length', otherwise raise an exception. A hard check + # ensures that a downloaded file strictly matches a known, or trusted, + # file length. + if observed_length != trusted_file_length: + raise tuf.exceptions.DownloadLengthMismatchError(trusted_file_length, + observed_length) + + +def _check_hashes(file_object, trusted_hashes): + + # Verify each trusted hash of 'trusted_hashes'. If all are valid, simply + # return. + for algorithm, trusted_hash in trusted_hashes.items(): + digest_object = securesystemslib.hash.digest(algorithm) + # Ensure we read from the beginning of the file object + # TODO: should we store file position (before the loop) and reset + # after we seek about? + file_object.seek(0) + digest_object.update(file_object.read()) + computed_hash = digest_object.hexdigest() + + # Raise an exception if any of the hashes are incorrect. + if trusted_hash != computed_hash: + raise securesystemslib.exceptions.BadHashError(trusted_hash, + computed_hash) + + logger.info('The file\'s ' + algorithm + ' hash is' + ' correct: ' + trusted_hash) + + + +def _get_target_hash(target_filepath, hash_function='sha256'): + + # Calculate the hash of the filepath to determine which bin to find the + # target. The client currently assumes the repository (i.e., repository + # tool) uses 'hash_function' to generate hashes and UTF-8. + digest_object = securesystemslib.hash.digest(hash_function) + encoded_target_filepath = target_filepath.encode('utf-8') + digest_object.update(encoded_target_filepath) + target_filepath_hash = digest_object.hexdigest() + + return target_filepath_hash def neither_403_nor_404(mirror_error): if isinstance(mirror_error, tuf.exceptions.FetcherHTTPError): From f666a59bd9b58c6bc6287391c99ec9c4ad247851 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 24 Feb 2021 19:10:28 +0200 Subject: [PATCH 03/86] Add tests and linter config to the new Updater Adds a basic test case for Updater. Applies the linter config used in api/metadata.py to all files under client_rework. Signed-off-by: Teodora Sechkova --- tests/test_updater_rework.py | 248 +++++++++++++++++++++++++++++++++++ tox.ini | 5 + tuf/client_rework/pylintrc | 6 + 3 files changed, 259 insertions(+) create mode 100644 tests/test_updater_rework.py create mode 100644 tuf/client_rework/pylintrc diff --git a/tests/test_updater_rework.py b/tests/test_updater_rework.py new file mode 100644 index 0000000000..bc6ce3a3f1 --- /dev/null +++ b/tests/test_updater_rework.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python + +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Test Updater class +""" + +import os +import time +import shutil +import copy +import tempfile +import logging +import errno +import sys +import unittest +import json +import tracemalloc + +if sys.version_info >= (3, 3): + import unittest.mock as mock +else: + import mock + +import tuf +import tuf.exceptions +import tuf.log +import tuf.repository_tool as repo_tool +import tuf.unittest_toolbox as unittest_toolbox +import tuf.client_rework.updater_rework as updater + +from tests import utils +from tuf.api import metadata + +import securesystemslib + +logger = logging.getLogger(__name__) + + +class TestUpdater(unittest_toolbox.Modified_TestCase): + + @classmethod + def setUpClass(cls): + # Create a temporary directory to store the repository, metadata, and target + # files. 'temporary_directory' must be deleted in TearDownModule() so that + # temporary files are always removed, even when exceptions occur. + cls.temporary_directory = tempfile.mkdtemp(dir=os.getcwd()) + + # Needed because in some tests simple_server.py cannot be found. + # The reason is that the current working directory + # has been changed when executing a subprocess. + cls.SIMPLE_SERVER_PATH = os.path.join(os.getcwd(), 'simple_server.py') + + # Launch a SimpleHTTPServer (serves files in the current directory). + # Test cases will request metadata and target files that have been + # pre-generated in 'tuf/tests/repository_data', which will be served + # by the SimpleHTTPServer launched here. The test cases of 'test_updater.py' + # assume the pre-generated metadata files have a specific structure, such + # as a delegated role 'targets/role1', three target files, five key files, + # etc. + cls.server_process_handler = utils.TestServerProcess(log=logger, + server=cls.SIMPLE_SERVER_PATH) + + + + @classmethod + def tearDownClass(cls): + # Cleans the resources and flush the logged lines (if any). + cls.server_process_handler.clean() + + # Remove the temporary repository directory, which should contain all the + # metadata, targets, and key files generated for the test cases + shutil.rmtree(cls.temporary_directory) + + + + def setUp(self): + # We are inheriting from custom class. + unittest_toolbox.Modified_TestCase.setUp(self) + + self.repository_name = 'test_repository1' + + # Copy the original repository files provided in the test folder so that + # any modifications made to repository files are restricted to the copies. + # The 'repository_data' directory is expected to exist in 'tuf.tests/'. + original_repository_files = os.path.join(os.getcwd(), 'repository_data') + temporary_repository_root = \ + self.make_temp_directory(directory=self.temporary_directory) + + # The original repository, keystore, and client directories will be copied + # for each test case. + original_repository = os.path.join(original_repository_files, 'repository') + original_keystore = os.path.join(original_repository_files, 'keystore') + original_client = os.path.join(original_repository_files, 'client') + + # Save references to the often-needed client repository directories. + # Test cases need these references to access metadata and target files. + self.repository_directory = \ + os.path.join(temporary_repository_root, 'repository') + self.keystore_directory = \ + os.path.join(temporary_repository_root, 'keystore') + + self.client_directory = os.path.join(temporary_repository_root, + 'client') + self.client_metadata = os.path.join(self.client_directory, + self.repository_name, 'metadata') + self.client_metadata_current = os.path.join(self.client_metadata, + 'current') + + # Copy the original 'repository', 'client', and 'keystore' directories + # to the temporary repository the test cases can use. + shutil.copytree(original_repository, self.repository_directory) + shutil.copytree(original_client, self.client_directory) + shutil.copytree(original_keystore, self.keystore_directory) + + # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. + repository_basepath = self.repository_directory[len(os.getcwd()):] + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + + str(self.server_process_handler.port) + repository_basepath + + # Setting 'tuf.settings.repository_directory' with the temporary client + # directory copied from the original repository files. + tuf.settings.repositories_directory = self.client_directory + + self.repository_mirrors = {'mirror1': {'url_prefix': url_prefix, + 'metadata_path': 'metadata', + 'targets_path': 'targets'}} + + # Creating a repository instance. The test cases will use this client + # updater to refresh metadata, fetch target files, etc. + self.repository_updater = updater.Updater(self.repository_name, + self.repository_mirrors) + + # Metadata role keys are needed by the test cases to make changes to the + # repository (e.g., adding a new target file to 'targets.json' and then + # requesting a refresh()). + self.role_keys = _load_role_keys(self.keystore_directory) + + + + def tearDown(self): + # We are inheriting from custom class. + unittest_toolbox.Modified_TestCase.tearDown(self) + + # Logs stdout and stderr from the sever subprocess. + self.server_process_handler.flush_log() + + + + # UNIT TESTS. + def test_refresh(self): + + self.repository_updater.refresh() + + for role in ['root', 'timestamp', 'snapshot', 'targets']: + metadata_obj = metadata.Metadata.from_file(os.path.join( + self.client_metadata_current, role + '.json')) + + metadata_obj_2 = metadata.Metadata.from_file(os.path.join( + self.repository_directory, 'metadata', role + '.json')) + + + self.assertDictEqual(metadata_obj.to_dict(), + metadata_obj_2.to_dict()) + + # Get targetinfo for 'file1.txt' listed in targets + targetinfo1 = self.repository_updater.get_one_valid_targetinfo('file1.txt') + # Get targetinfo for 'file3.txt' listed in the delegated role1 + targetinfo3= self.repository_updater.get_one_valid_targetinfo('file3.txt') + + destination_directory = self.make_temp_directory() + updated_targets = self.repository_updater.updated_targets([targetinfo1, targetinfo3], + destination_directory) + + self.assertListEqual(updated_targets, [targetinfo1, targetinfo3]) + + self.repository_updater.download_target(targetinfo1, destination_directory) + updated_targets = self.repository_updater.updated_targets(updated_targets, + destination_directory) + + self.assertListEqual(updated_targets, [targetinfo3]) + + + self.repository_updater.download_target(targetinfo3, destination_directory) + updated_targets = self.repository_updater.updated_targets(updated_targets, + destination_directory) + + self.assertListEqual(updated_targets, []) + + +def _load_role_keys(keystore_directory): + + # Populating 'self.role_keys' by importing the required public and private + # keys of 'tuf/tests/repository_data/'. The role keys are needed when + # modifying the remote repository used by the test cases in this unit test. + + # The pre-generated key files in 'repository_data/keystore' are all encrypted with + # a 'password' passphrase. + EXPECTED_KEYFILE_PASSWORD = 'password' + + # Store and return the cryptography keys of the top-level roles, including 1 + # delegated role. + role_keys = {} + + root_key_file = os.path.join(keystore_directory, 'root_key') + targets_key_file = os.path.join(keystore_directory, 'targets_key') + snapshot_key_file = os.path.join(keystore_directory, 'snapshot_key') + timestamp_key_file = os.path.join(keystore_directory, 'timestamp_key') + delegation_key_file = os.path.join(keystore_directory, 'delegation_key') + + role_keys = {'root': {}, 'targets': {}, 'snapshot': {}, 'timestamp': {}, + 'role1': {}} + + # Import the top-level and delegated role public keys. + role_keys['root']['public'] = \ + repo_tool.import_rsa_publickey_from_file(root_key_file+'.pub') + role_keys['targets']['public'] = \ + repo_tool.import_ed25519_publickey_from_file(targets_key_file+'.pub') + role_keys['snapshot']['public'] = \ + repo_tool.import_ed25519_publickey_from_file(snapshot_key_file+'.pub') + role_keys['timestamp']['public'] = \ + repo_tool.import_ed25519_publickey_from_file(timestamp_key_file+'.pub') + role_keys['role1']['public'] = \ + repo_tool.import_ed25519_publickey_from_file(delegation_key_file+'.pub') + + # Import the private keys of the top-level and delegated roles. + role_keys['root']['private'] = \ + repo_tool.import_rsa_privatekey_from_file(root_key_file, + EXPECTED_KEYFILE_PASSWORD) + role_keys['targets']['private'] = \ + repo_tool.import_ed25519_privatekey_from_file(targets_key_file, + EXPECTED_KEYFILE_PASSWORD) + role_keys['snapshot']['private'] = \ + repo_tool.import_ed25519_privatekey_from_file(snapshot_key_file, + EXPECTED_KEYFILE_PASSWORD) + role_keys['timestamp']['private'] = \ + repo_tool.import_ed25519_privatekey_from_file(timestamp_key_file, + EXPECTED_KEYFILE_PASSWORD) + role_keys['role1']['private'] = \ + repo_tool.import_ed25519_privatekey_from_file(delegation_key_file, + EXPECTED_KEYFILE_PASSWORD) + + return role_keys + +if __name__ == '__main__': + utils.configure_test_logging(sys.argv) + unittest.main() diff --git a/tox.ini b/tox.ini index 9cdafa6e58..27d4b46534 100644 --- a/tox.ini +++ b/tox.ini @@ -51,3 +51,8 @@ commands = pylint {toxinidir}/tuf --ignore={toxinidir}/tuf/api,{toxinidir}/tuf/api/serialization bandit -r {toxinidir}/tuf + +[testenv:lint-client] +commands = + pylint {toxinidir}/tuf/client_rework --rcfile={toxinidir}/tuf/client_rework/pylintrc + bandit -r {toxinidir}/tuf diff --git a/tuf/client_rework/pylintrc b/tuf/client_rework/pylintrc new file mode 100644 index 0000000000..a75347f446 --- /dev/null +++ b/tuf/client_rework/pylintrc @@ -0,0 +1,6 @@ +[MESSAGE_CONTROL] +disable=fixme + +[FORMAT] +indent-string=" " +max-line-length=79 From 1fa7412e87d0a73ca1446601169a8477a6792a9a Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 18 Mar 2021 17:52:05 +0200 Subject: [PATCH 04/86] Temporary reduce code coverage to 90 percent Coverage failures may hide other failing tests in the CI. Configure coverage to fail under 90 percent during the ongoing experimental-client development. Signed-off-by: Teodora Sechkova --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 27d4b46534..d87964de14 100644 --- a/tox.ini +++ b/tox.ini @@ -16,7 +16,7 @@ changedir = tests commands = python --version coverage run aggregate_tests.py - coverage report -m --fail-under 97 + coverage report -m --fail-under 90 deps = -r{toxinidir}/requirements-test.txt From 8abe49abb4646d3b32f34dabbf98ede13eeee0bf Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 18 Mar 2021 18:11:20 +0200 Subject: [PATCH 05/86] Add .gitattributes file For compatibility with Windows systems, declare repository_data files to always have LF line endings on checkout. A trailing "/**" matches everything inside, with infinite depth. Signed-off-by: Teodora Sechkova --- .gitattributes | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..66709ac428 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +# Files that will always have LF line endings on checkout. +tests/repository_data/** text eol=lf + From c4b5eb5e6b48f74bd85b90375b60fc0cfe382b40 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Mon, 22 Mar 2021 20:15:38 +0200 Subject: [PATCH 06/86] Update the tox lint environment Apply the updated api/pylintrc config to the client_rework directory. Signed-off-by: Teodora Sechkova --- tox.ini | 8 ++------ tuf/client_rework/pylintrc | 6 ------ 2 files changed, 2 insertions(+), 12 deletions(-) delete mode 100644 tuf/client_rework/pylintrc diff --git a/tox.ini b/tox.ini index d87964de14..8036eada7e 100644 --- a/tox.ini +++ b/tox.ini @@ -45,14 +45,10 @@ commands = black --check --diff --line-length 80 {toxinidir}/tuf/api isort --check --diff --line-length 80 --profile black -p tuf {toxinidir}/tuf/api pylint {toxinidir}/tuf/api --rcfile={toxinidir}/tuf/api/pylintrc + pylint {toxinidir}/tuf/client_rework --rcfile={toxinidir}/tuf/api/pylintrc # NOTE: Contrary to what the pylint docs suggest, ignoring full paths does # work, unfortunately each subdirectory has to be ignored explicitly. - pylint {toxinidir}/tuf --ignore={toxinidir}/tuf/api,{toxinidir}/tuf/api/serialization + pylint {toxinidir}/tuf --ignore={toxinidir}/tuf/api,{toxinidir}/tuf/api/serialization,{toxinidir}/tuf/client_rework bandit -r {toxinidir}/tuf - -[testenv:lint-client] -commands = - pylint {toxinidir}/tuf/client_rework --rcfile={toxinidir}/tuf/client_rework/pylintrc - bandit -r {toxinidir}/tuf diff --git a/tuf/client_rework/pylintrc b/tuf/client_rework/pylintrc deleted file mode 100644 index a75347f446..0000000000 --- a/tuf/client_rework/pylintrc +++ /dev/null @@ -1,6 +0,0 @@ -[MESSAGE_CONTROL] -disable=fixme - -[FORMAT] -indent-string=" " -max-line-length=79 From 5edb58c537d04839c1ae122a3145287b396f6327 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Mon, 22 Mar 2021 20:21:12 +0200 Subject: [PATCH 07/86] Apply black and isort over the refactored client Run manually the black and isort code formatters over the client_rework code. Signed-off-by: Teodora Sechkova --- tuf/client_rework/metadata_wrapper.py | 55 ++- tuf/client_rework/updater_rework.py | 461 ++++++++++++++------------ 2 files changed, 270 insertions(+), 246 deletions(-) diff --git a/tuf/client_rework/metadata_wrapper.py b/tuf/client_rework/metadata_wrapper.py index a25a278234..89b4a1da1e 100644 --- a/tuf/client_rework/metadata_wrapper.py +++ b/tuf/client_rework/metadata_wrapper.py @@ -8,10 +8,9 @@ import time from securesystemslib.keys import format_metadata_to_key -from tuf.api import metadata -import tuf.exceptions - +import tuf.exceptions +from tuf.api import metadata class MetadataWrapper: @@ -24,20 +23,18 @@ def __init__(self, meta): @classmethod def from_json_object(cls, tmp_file): - """Loads JSON-formatted TUF metadata from a file object. - """ + """Loads JSON-formatted TUF metadata from a file object.""" raw_data = tmp_file.read() from tuf.api.serialization.json import JSONDeserializer + deserializer = JSONDeserializer() _meta = deserializer.deserialize(raw_data) return cls(meta=_meta) - @classmethod def from_json_file(cls, filename): - """Loads JSON-formatted TUF metadata from a file. - """ + """Loads JSON-formatted TUF metadata from a file.""" _meta = metadata.Metadata.from_file(filename) return cls(meta=_meta) @@ -55,7 +52,6 @@ def version(self): """ return self._meta.signed.version - def verify(self, keys, threshold): """ TODO @@ -64,87 +60,85 @@ def verify(self, keys, threshold): # 1.3. Check signatures for key in keys: self._meta.verify(key) - verified+=1 + verified += 1 if verified < threshold: raise tuf.exceptions.InsufficientKeysError - def persist(self, filename): """ TODO """ self._meta.to_file(filename) - def expires(self, reference_time=None): """ TODO """ if reference_time is None: expires_timestamp = tuf.formats.datetime_to_unix_timestamp( - self._meta.signed.expires) + self._meta.signed.expires + ) reference_time = int(time.time()) if expires_timestamp < reference_time: raise tuf.exceptions.ExpiredMetadataError - - class RootWrapper(MetadataWrapper): """ TODO """ + def keys(self, role): """ TODO """ keys = [] - for keyid in self._meta.signed.roles[role]['keyids']: + for keyid in self._meta.signed.roles[role]["keyids"]: key_metadata = self._meta.signed.keys[keyid] key, _ = format_metadata_to_key(key_metadata) keys.append(key) return keys - def threshold(self, role): """ TODO """ - return self._meta.signed.roles[role]['threshold'] - + return self._meta.signed.roles[role]["threshold"] class TimestampWrapper(MetadataWrapper): """ TODO """ + @property def snapshot(self): """ TODO """ - return self._meta.signed.meta['snapshot.json'] + return self._meta.signed.meta["snapshot.json"] class SnapshotWrapper(MetadataWrapper): """ TODO """ + def role(self, name): """ TODO """ - return self._meta.signed.meta[name + '.json'] - + return self._meta.signed.meta[name + ".json"] class TargetsWrapper(MetadataWrapper): """ TODO """ + @property def targets(self): """ @@ -152,7 +146,6 @@ def targets(self): """ return self._meta.signed.targets - @property def delegations(self): """ @@ -160,27 +153,25 @@ def delegations(self): """ return self._meta.signed.delegations - def keys(self, role): """ TODO """ keys = [] - for delegation in self._meta.signed.delegations['roles']: - if delegation['name'] == role: - for keyid in delegation['keyids']: - key_metadata = self._meta.signed.delegations['keys'][keyid] + for delegation in self._meta.signed.delegations["roles"]: + if delegation["name"] == role: + for keyid in delegation["keyids"]: + key_metadata = self._meta.signed.delegations["keys"][keyid] key, _ = format_metadata_to_key(key_metadata) keys.append(key) return keys - def threshold(self, role): """ TODO """ - for delegation in self._meta.signed.delegations['roles']: - if delegation['name'] == role: - return delegation['threshold'] + for delegation in self._meta.signed.delegations["roles"]: + if delegation["name"] == role: + return delegation["threshold"] return None diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 8c69398ee6..9e04c20475 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -7,29 +7,27 @@ """ -#Imports -import os -import logging import fnmatch - -from typing import TextIO, BinaryIO, Dict, Optional +import logging +import os +from typing import BinaryIO, Dict, Optional, TextIO import securesystemslib.exceptions import securesystemslib.util -import tuf.settings -import tuf.mirrors import tuf.download import tuf.exceptions import tuf.formats - +import tuf.mirrors +import tuf.settings from tuf.client.fetcher import FetcherInterface from tuf.requests_fetcher import RequestsFetcher + from .metadata_wrapper import ( RootWrapper, SnapshotWrapper, + TargetsWrapper, TimestampWrapper, - TargetsWrapper ) # Globals @@ -53,24 +51,27 @@ class Updater: """ def __init__( - self, repository_name: str, - repository_mirrors: Dict, - fetcher: Optional[FetcherInterface]=None): + self, + repository_name: str, + repository_mirrors: Dict, + fetcher: Optional[FetcherInterface] = None, + ): self._repository_name = repository_name self._mirrors = repository_mirrors self._consistent_snapshot = False - self._metadata = {'root': {}, - 'timestamp': {}, - 'snapshot': {}, - 'targets': {}} + self._metadata = { + "root": {}, + "timestamp": {}, + "snapshot": {}, + "targets": {}, + } if fetcher is None: self._fetcher = RequestsFetcher() else: self._fetcher = fetcher - def refresh(self) -> None: """ This method downloads, verifies, and loads metadata for the top-level @@ -88,8 +89,7 @@ def refresh(self) -> None: self._load_root() self._load_timestamp() self._load_snapshot() - self._load_targets('targets', 'root') - + self._load_targets("targets", "root") def get_one_valid_targetinfo(self, filename: str) -> Dict: """ @@ -99,9 +99,9 @@ def get_one_valid_targetinfo(self, filename: str) -> Dict: """ return self._preorder_depth_first_walk(filename) - - def updated_targets(self, targets: Dict, - destination_directory: str) -> Dict: + def updated_targets( + self, targets: Dict, destination_directory: str + ) -> Dict: """ After the client has retrieved the target information for those targets they are interested in updating, they would call this method to @@ -121,7 +121,7 @@ def updated_targets(self, targets: Dict, # against each hash listed for its fileinfo. Note: join() discards # 'destination_directory' if 'filepath' contains a leading path # separator (i.e., is treated as an absolute path). - filepath = target['filepath'] + filepath = target["filepath"] target_filepath = os.path.join(destination_directory, filepath) if target_filepath in updated_targetpaths: @@ -129,11 +129,12 @@ def updated_targets(self, targets: Dict, # Try one of the algorithm/digest combos for a mismatch. We break # as soon as we find a mismatch. - for algorithm, digest in target['fileinfo']['hashes'].items(): + for algorithm, digest in target["fileinfo"]["hashes"].items(): digest_object = None try: digest_object = securesystemslib.hash.digest_filename( - target_filepath, algorithm=algorithm) + target_filepath, algorithm=algorithm + ) # This exception will occur if the target does not exist # locally. @@ -150,7 +151,6 @@ def updated_targets(self, targets: Dict, return updated_targets - def download_target(self, target: Dict, destination_directory: str): """ This method performs the actual download of the specified target. @@ -165,26 +165,23 @@ def download_target(self, target: Dict, destination_directory: str): # TODO: do something with exceptions raise - filepath = os.path.join(destination_directory, target['filepath']) + filepath = os.path.join(destination_directory, target["filepath"]) securesystemslib.util.persist_temp_file(temp_obj, filepath) - - - def _mirror_meta_download( - self, filename: str, upper_length: int) -> TextIO: + def _mirror_meta_download(self, filename: str, upper_length: int) -> TextIO: """ Download metadata file from the list of metadata mirrors """ - file_mirrors = tuf.mirrors.get_list_of_mirrors('meta', filename, - self._mirrors) + file_mirrors = tuf.mirrors.get_list_of_mirrors( + "meta", filename, self._mirrors + ) file_mirror_errors = {} for file_mirror in file_mirrors: try: temp_obj = tuf.download.unsafe_download( - file_mirror, - upper_length, - self._fetcher) + file_mirror, upper_length, self._fetcher + ) temp_obj.seek(0) yield temp_obj @@ -195,8 +192,8 @@ def _mirror_meta_download( finally: if file_mirror_errors: raise tuf.exceptions.NoWorkingMirrorError( - file_mirror_errors) - + file_mirror_errors + ) def _mirror_target_download(self, fileinfo: str) -> BinaryIO: """ @@ -204,15 +201,15 @@ def _mirror_target_download(self, fileinfo: str) -> BinaryIO: """ # full_filename = _get_full_name(filename) file_mirrors = tuf.mirrors.get_list_of_mirrors( - 'target', fileinfo['filepath'], self._mirrors) + "target", fileinfo["filepath"], self._mirrors + ) file_mirror_errors = {} for file_mirror in file_mirrors: try: temp_obj = tuf.download.safe_download( - file_mirror, - fileinfo['fileinfo']['length'], - self._fetcher) + file_mirror, fileinfo["fileinfo"]["length"], self._fetcher + ) temp_obj.seek(0) yield temp_obj @@ -223,13 +220,12 @@ def _mirror_target_download(self, fileinfo: str) -> BinaryIO: finally: if file_mirror_errors: raise tuf.exceptions.NoWorkingMirrorError( - file_mirror_errors) + file_mirror_errors + ) - - def _get_full_meta_name(self, - role: str, - extension: str ='.json', - version: int = None) -> str: + def _get_full_meta_name( + self, role: str, extension: str = ".json", version: int = None + ) -> str: """ Helper method returning full metadata file path given the role name and file extension. @@ -237,15 +233,18 @@ def _get_full_meta_name(self, if version is None: filename = role + extension else: - filename = str(version) + '.' + role + extension - return os.path.join(tuf.settings.repositories_directory, - self._repository_name, 'metadata', 'current', filename) - + filename = str(version) + "." + role + extension + return os.path.join( + tuf.settings.repositories_directory, + self._repository_name, + "metadata", + "current", + filename, + ) def _get_relative_meta_name( - self, role: str, - extension: str ='.json', - version: int = None) -> str: + self, role: str, extension: str = ".json", version: int = None + ) -> str: """ Helper method returning full metadata file path given the role name and file extension. @@ -253,32 +252,33 @@ def _get_relative_meta_name( if version is None: filename = role + extension else: - filename = str(version) + '.' + role + extension + filename = str(version) + "." + role + extension return filename - - def _load_root(self) -> None: + def _load_root(self) -> None: """ If metadata file for 'root' role does not exist locally, download it over a network, verify it and store it permanently. """ # Load trusted root metadata - self._metadata['root'] = RootWrapper.from_json_file( - self._get_full_meta_name('root')) + self._metadata["root"] = RootWrapper.from_json_file( + self._get_full_meta_name("root") + ) # Update the root role # 1.1. Let N denote the version number of the trusted # root metadata file. - lower_bound = self._metadata['root']._meta.signed.version + lower_bound = self._metadata["root"]._meta.signed.version upper_bound = lower_bound + tuf.settings.MAX_NUMBER_ROOT_ROTATIONS verified_root = None for next_version in range(lower_bound, upper_bound): try: mirror_download = self._mirror_meta_download( - self._get_relative_meta_name('root', version=next_version), - tuf.settings.DEFAULT_ROOT_REQUIRED_LENGTH) + self._get_relative_meta_name("root", version=next_version), + tuf.settings.DEFAULT_ROOT_REQUIRED_LENGTH, + ) for temp_obj in mirror_download: try: @@ -304,37 +304,37 @@ def _load_root(self) -> None: # 1.9. If the timestamp and / or snapshot keys have been rotated, # then delete the trusted timestamp and snapshot metadata files. - if (self._metadata['root'].keys('timestamp') != - verified_root.keys('timestamp')): - # FIXME: use abstract storage - os.remove(self._get_full_meta_name('timestamp')) - self._metadata['timestamp'] = {} - - if (self._metadata['root'].keys('snapshot') != - verified_root.keys('snapshot')): - # FIXME: use abstract storage - os.remove(self._get_full_meta_name('snapshot')) - self._metadata['snapshot'] = {} - - self._metadata['root'] = verified_root + if self._metadata["root"].keys("timestamp") != verified_root.keys( + "timestamp" + ): + # FIXME: use abstract storage + os.remove(self._get_full_meta_name("timestamp")) + self._metadata["timestamp"] = {} + + if self._metadata["root"].keys("snapshot") != verified_root.keys( + "snapshot" + ): + # FIXME: use abstract storage + os.remove(self._get_full_meta_name("snapshot")) + self._metadata["snapshot"] = {} + + self._metadata["root"] = verified_root # Persist root metadata. The client MUST write the file to non-volatile # storage as FILENAME.EXT (e.g. root.json). - self._metadata['root'].persist(self._get_full_meta_name('root')) + self._metadata["root"].persist(self._get_full_meta_name("root")) # 1.10. Set whether consistent snapshots are used as per # the trusted root metadata file - self._consistent_snapshot = \ - self._metadata['root'].signed.consistent_snapshot + self._consistent_snapshot = self._metadata[ + "root" + ].signed.consistent_snapshot temp_obj.close() - - - - def _load_timestamp(self) -> None: # TODO Check if timestamp exists locally - for temp_obj in self._mirror_meta_download('timestamp.json', - tuf.settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH): + for temp_obj in self._mirror_meta_download( + "timestamp.json", tuf.settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH + ): try: verified_tampstamp = self._verify_timestamp(temp_obj) # break? should we break after first successful download? @@ -343,31 +343,30 @@ def _load_timestamp(self) -> None: temp_obj.close() raise - self._metadata['timestamp'] = verified_tampstamp + self._metadata["timestamp"] = verified_tampstamp # Persist root metadata. The client MUST write the file to # non-volatile storage as FILENAME.EXT (e.g. root.json). - self._metadata['timestamp'].persist( - self._get_full_meta_name('timestamp.json')) + self._metadata["timestamp"].persist( + self._get_full_meta_name("timestamp.json") + ) temp_obj.close() - - def _load_snapshot(self) -> None: try: - length = self._metadata['timestamp'].snapshot['length'] + length = self._metadata["timestamp"].snapshot["length"] except KeyError: length = tuf.settings.DEFAULT_SNAPSHOT_REQUIRED_LENGTH if self._consistent_snapshot: - version = self._metadata['timestamp'].snapshot['version'] + version = self._metadata["timestamp"].snapshot["version"] else: version = None - #Check if exists locally + # Check if exists locally # self.loadLocal('snapshot', snapshotVerifier) - for temp_obj in self._mirror_meta_download('snapshot.json', length): + for temp_obj in self._mirror_meta_download("snapshot.json", length): try: verified_snapshot = self._verify_snapshot(temp_obj) # break? should we break after first successful download? @@ -376,35 +375,36 @@ def _load_snapshot(self) -> None: temp_obj.close() raise - self._metadata['snapshot'] = verified_snapshot + self._metadata["snapshot"] = verified_snapshot # Persist root metadata. The client MUST write the file to # non-volatile storage as FILENAME.EXT (e.g. root.json). - self._metadata['snapshot'].persist( - self._get_full_meta_name('snapshot.json')) + self._metadata["snapshot"].persist( + self._get_full_meta_name("snapshot.json") + ) temp_obj.close() - def _load_targets(self, targets_role: str, parent_role: str) -> None: try: - length = self._metadata['snapshot'].role(targets_role)['length'] + length = self._metadata["snapshot"].role(targets_role)["length"] except KeyError: length = tuf.settings.DEFAULT_TARGETS_REQUIRED_LENGTH if self._consistent_snapshot: - version = self._metadata['snapshot'].role(targets_role)['version'] + version = self._metadata["snapshot"].role(targets_role)["version"] else: version = None - - #Check if exists locally + # Check if exists locally # self.loadLocal('snapshot', targetsVerifier) for temp_obj in self._mirror_meta_download( - targets_role + '.json', length): + targets_role + ".json", length + ): try: - verified_targets = self._verify_targets(temp_obj, - targets_role, parent_role) + verified_targets = self._verify_targets( + temp_obj, targets_role, parent_role + ) # break? should we break after first successful download? except Exception as exception: # TODO: do something with exceptions @@ -414,90 +414,103 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: # Persist root metadata. The client MUST write the file to # non-volatile storage as FILENAME.EXT (e.g. root.json). self._metadata[targets_role].persist( - self._get_full_meta_name(targets_role, extension='.json')) + self._get_full_meta_name(targets_role, extension=".json") + ) temp_obj.close() - - def _verify_root(self, temp_obj: TextIO) -> RootWrapper: intermediate_root = RootWrapper.from_json_object(temp_obj) # Check for an arbitrary software attack - trusted_root = self._metadata['root'] - intermediate_root.verify(trusted_root.keys('root'), - trusted_root.threshold('root')) - intermediate_root.verify(intermediate_root.keys('root'), - intermediate_root.threshold('root')) + trusted_root = self._metadata["root"] + intermediate_root.verify( + trusted_root.keys("root"), trusted_root.threshold("root") + ) + intermediate_root.verify( + intermediate_root.keys("root"), intermediate_root.threshold("root") + ) # Check for a rollback attack. if intermediate_root.version < trusted_root.version: temp_obj.close() raise tuf.exceptions.ReplayedMetadataError( - 'root', intermediate_root.version(), trusted_root.version()) + "root", intermediate_root.version(), trusted_root.version() + ) # Note that the expiration of the new (intermediate) root metadata # file does not matter yet, because we will check for it in step 1.8. return intermediate_root - def _verify_timestamp(self, temp_obj: TextIO) -> TimestampWrapper: intermediate_timestamp = TimestampWrapper.from_json_object(temp_obj) # Check for an arbitrary software attack - trusted_root = self._metadata['root'] + trusted_root = self._metadata["root"] intermediate_timestamp.verify( - trusted_root.keys('timestamp'), - trusted_root.threshold('timestamp')) + trusted_root.keys("timestamp"), trusted_root.threshold("timestamp") + ) # Check for a rollback attack. - if self._metadata['timestamp']: - if (intermediate_timestamp.signed.version <= - self._metadata['timestamp'].version): + if self._metadata["timestamp"]: + if ( + intermediate_timestamp.signed.version + <= self._metadata["timestamp"].version + ): temp_obj.close() raise tuf.exceptions.ReplayedMetadataError( - 'root', intermediate_timestamp.version(), - self._metadata['timestamp'].version()) - - if self._metadata['snapshot']: - if (intermediate_timestamp.snapshot.version <= - self._metadata['timestamp'].snapshot['version']): + "root", + intermediate_timestamp.version(), + self._metadata["timestamp"].version(), + ) + + if self._metadata["snapshot"]: + if ( + intermediate_timestamp.snapshot.version + <= self._metadata["timestamp"].snapshot["version"] + ): temp_obj.close() raise tuf.exceptions.ReplayedMetadataError( - 'root', intermediate_timestamp.snapshot.version(), - self._metadata['snapshot'].version()) + "root", + intermediate_timestamp.snapshot.version(), + self._metadata["snapshot"].version(), + ) intermediate_timestamp.expires() return intermediate_timestamp - - def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: - # Check against timestamp metadata - if self._metadata['timestamp'].snapshot.get('hash'): - _check_hashes(temp_obj, - self._metadata['timestamp'].snapshot.get('hash')) + # Check against timestamp metadata + if self._metadata["timestamp"].snapshot.get("hash"): + _check_hashes( + temp_obj, self._metadata["timestamp"].snapshot.get("hash") + ) intermediate_snapshot = SnapshotWrapper.from_json_object(temp_obj) - if (intermediate_snapshot.version != - self._metadata['timestamp'].snapshot['version']): + if ( + intermediate_snapshot.version + != self._metadata["timestamp"].snapshot["version"] + ): temp_obj.close() raise tuf.exceptions.BadVersionNumberError # Check for an arbitrary software attack - trusted_root = self._metadata['root'] - intermediate_snapshot.verify(trusted_root.keys('snapshot'), - trusted_root.threshold('snapshot')) + trusted_root = self._metadata["root"] + intermediate_snapshot.verify( + trusted_root.keys("snapshot"), trusted_root.threshold("snapshot") + ) # Check for a rollback attack - if self._metadata['snapshot']: + if self._metadata["snapshot"]: for target_role in intermediate_snapshot.signed.meta: - if (target_role['version'] != - self._metadata['snapshot'].meta[target_role]['version']): + if ( + target_role["version"] + != self._metadata["snapshot"].meta[target_role]["version"] + ): temp_obj.close() raise tuf.exceptions.BadVersionNumberError @@ -505,45 +518,44 @@ def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: return intermediate_snapshot + def _verify_targets( + self, temp_obj: TextIO, filename: str, parent_role: str + ) -> TargetsWrapper: - def _verify_targets(self, - temp_obj: TextIO, filename: str, parent_role: str) -> TargetsWrapper: - - # Check against timestamp metadata - if self._metadata['snapshot'].role(filename).get('hash'): - _check_hashes(temp_obj, - self._metadata['snapshot'].targets.get('hash')) + # Check against timestamp metadata + if self._metadata["snapshot"].role(filename).get("hash"): + _check_hashes( + temp_obj, self._metadata["snapshot"].targets.get("hash") + ) intermediate_targets = TargetsWrapper.from_json_object(temp_obj) - if (intermediate_targets.version != - self._metadata['snapshot'].role(filename)['version']): + if ( + intermediate_targets.version + != self._metadata["snapshot"].role(filename)["version"] + ): temp_obj.close() raise tuf.exceptions.BadVersionNumberError # Check for an arbitrary software attack parent_role = self._metadata[parent_role] - intermediate_targets.verify(parent_role.keys(filename), - parent_role.threshold(filename)) + intermediate_targets.verify( + parent_role.keys(filename), parent_role.threshold(filename) + ) intermediate_targets.expires() return intermediate_targets + def _verify_target_file(self, temp_obj: BinaryIO, targetinfo: Dict) -> None: - - def _verify_target_file(self, - temp_obj: BinaryIO, targetinfo: Dict) -> None: - - _check_file_length(temp_obj, targetinfo['fileinfo']['length']) - _check_hashes(temp_obj, targetinfo['fileinfo']['hashes']) - - + _check_file_length(temp_obj, targetinfo["fileinfo"]["length"]) + _check_hashes(temp_obj, targetinfo["fileinfo"]["hashes"]) def _preorder_depth_first_walk(self, target_filepath) -> Dict: target = None - role_names = [('targets', 'root')] + role_names = [("targets", "root")] visited_role_names = set() number_of_delegations = tuf.settings.MAX_NUMBER_OF_DELEGATIONS @@ -556,9 +568,9 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: # self._update_metadata_if_changed('targets') # Preorder depth-first traversal of the graph of target delegations. - while (target is None and - number_of_delegations > 0 and - len(role_names) > 0): + while ( + target is None and number_of_delegations > 0 and len(role_names) > 0 + ): # Pop the role name from the top of the stack. role_name, parent_role = role_names.pop(-1) @@ -587,7 +599,7 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: # And also decrement number of visited roles. number_of_delegations -= 1 delegations = role_metadata.delegations - child_roles = delegations.get('roles', []) + child_roles = delegations.get("roles", []) if target is None: @@ -596,27 +608,35 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: # delegated roles. for child_role in child_roles: child_role_name = _visit_child_role( - child_role, target_filepath) - - if (child_role['terminating'] and - child_role_name is not None): - logger.debug('Adding child role ' + - repr(child_role_name)) - logger.debug('Not backtracking to other roles.') + child_role, target_filepath + ) + + if ( + child_role["terminating"] + and child_role_name is not None + ): + logger.debug( + "Adding child role " + repr(child_role_name) + ) + logger.debug("Not backtracking to other roles.") role_names = [] child_roles_to_visit.append( - (child_role_name, role_name)) + (child_role_name, role_name) + ) break if child_role_name is None: - logger.debug('Skipping child role ' + - repr(child_role_name)) + logger.debug( + "Skipping child role " + repr(child_role_name) + ) else: - logger.debug('Adding child role ' + - repr(child_role_name)) + logger.debug( + "Adding child role " + repr(child_role_name) + ) child_roles_to_visit.append( - (child_role_name, role_name)) + (child_role_name, role_name) + ) # Push 'child_roles_to_visit' in reverse order of appearance # onto 'role_names'. Roles are popped from the end of @@ -625,21 +645,22 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: role_names.extend(child_roles_to_visit) else: - logger.debug('Found target in current role ' + - repr(role_name)) - - if (target is None and - number_of_delegations == 0 and - len(role_names) > 0): - logger.debug(repr(len(role_names)) + ' roles left to visit, ' + - 'but allowed to visit at most ' + - repr(tuf.settings.MAX_NUMBER_OF_DELEGATIONS) + ' delegations.') - - return {'filepath': target_filepath, 'fileinfo': target} - - + logger.debug("Found target in current role " + repr(role_name)) + if ( + target is None + and number_of_delegations == 0 + and len(role_names) > 0 + ): + logger.debug( + repr(len(role_names)) + + " roles left to visit, " + + "but allowed to visit at most " + + repr(tuf.settings.MAX_NUMBER_OF_DELEGATIONS) + + " delegations." + ) + return {"filepath": target_filepath, "fileinfo": target} def _visit_child_role(child_role: Dict, target_filepath: str) -> str: @@ -682,9 +703,9 @@ def _visit_child_role(child_role: Dict, target_filepath: str) -> str: Otherwise, we return None. """ - child_role_name = child_role['name'] - child_role_paths = child_role.get('paths') - child_role_path_hash_prefixes = child_role.get('path_hash_prefixes') + child_role_name = child_role["name"] + child_role_paths = child_role.get("paths") + child_role_path_hash_prefixes = child_role.get("path_hash_prefixes") if child_role_path_hash_prefixes is not None: target_filepath_hash = _get_target_hash(target_filepath) @@ -706,26 +727,35 @@ def _visit_child_role(child_role: Dict, target_filepath: str) -> str: # target without a leading path separator - make sure to strip any # leading path separators so that a match is made. # Example: "foo.tgz" should match with "/*.tgz". - if fnmatch.fnmatch(target_filepath.lstrip(os.sep), - child_role_path.lstrip(os.sep)): - logger.debug('Child role ' + repr(child_role_name) + - ' is allowed to sign for ' + repr(target_filepath)) + if fnmatch.fnmatch( + target_filepath.lstrip(os.sep), child_role_path.lstrip(os.sep) + ): + logger.debug( + "Child role " + + repr(child_role_name) + + " is allowed to sign for " + + repr(target_filepath) + ) return child_role_name logger.debug( - 'The given target path ' + repr(target_filepath) + - ' does not match the trusted path or glob pattern: ' + - repr(child_role_path)) + "The given target path " + + repr(target_filepath) + + " does not match the trusted path or glob pattern: " + + repr(child_role_path) + ) continue else: # 'role_name' should have been validated when it was downloaded. # The 'paths' or 'path_hash_prefixes' fields should not be missing, # so we raise a format error here in case they are both missing. - raise tuf.exceptions.FormatError(repr(child_role_name) + ' ' + raise tuf.exceptions.FormatError( + repr(child_role_name) + " " 'has neither a "paths" nor "path_hash_prefixes". At least' - ' one of these attributes must be present.') + " one of these attributes must be present." + ) return None @@ -740,8 +770,9 @@ def _check_file_length(file_object, trusted_file_length): # ensures that a downloaded file strictly matches a known, or trusted, # file length. if observed_length != trusted_file_length: - raise tuf.exceptions.DownloadLengthMismatchError(trusted_file_length, - observed_length) + raise tuf.exceptions.DownloadLengthMismatchError( + trusted_file_length, observed_length + ) def _check_hashes(file_object, trusted_hashes): @@ -759,26 +790,28 @@ def _check_hashes(file_object, trusted_hashes): # Raise an exception if any of the hashes are incorrect. if trusted_hash != computed_hash: - raise securesystemslib.exceptions.BadHashError(trusted_hash, - computed_hash) - - logger.info('The file\'s ' + algorithm + ' hash is' - ' correct: ' + trusted_hash) + raise securesystemslib.exceptions.BadHashError( + trusted_hash, computed_hash + ) + logger.info( + "The file's " + algorithm + " hash is" " correct: " + trusted_hash + ) -def _get_target_hash(target_filepath, hash_function='sha256'): +def _get_target_hash(target_filepath, hash_function="sha256"): # Calculate the hash of the filepath to determine which bin to find the # target. The client currently assumes the repository (i.e., repository # tool) uses 'hash_function' to generate hashes and UTF-8. digest_object = securesystemslib.hash.digest(hash_function) - encoded_target_filepath = target_filepath.encode('utf-8') + encoded_target_filepath = target_filepath.encode("utf-8") digest_object.update(encoded_target_filepath) target_filepath_hash = digest_object.hexdigest() return target_filepath_hash + def neither_403_nor_404(mirror_error): if isinstance(mirror_error, tuf.exceptions.FetcherHTTPError): if mirror_error.status_code in {403, 404}: From 2d48cf2d51c28be6ca35bb7c69f7180fe6b410e5 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Mon, 22 Mar 2021 20:44:37 +0200 Subject: [PATCH 08/86] Configure tox to run black over the new client code Configure tox to run black and isort over the files under client_rework directory. Signed-off-by: Teodora Sechkova --- tox.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tox.ini b/tox.ini index 8036eada7e..3d6c230dd1 100644 --- a/tox.ini +++ b/tox.ini @@ -43,7 +43,9 @@ commands = # Use different configs for new (tuf/api/*) and legacy code # TODO: configure black and isort args in pyproject.toml (see #1161) black --check --diff --line-length 80 {toxinidir}/tuf/api + black --check --diff --line-length 80 {toxinidir}/tuf/client_rework isort --check --diff --line-length 80 --profile black -p tuf {toxinidir}/tuf/api + isort --check --diff --line-length 80 --profile black -p tuf {toxinidir}/tuf/client_rework pylint {toxinidir}/tuf/api --rcfile={toxinidir}/tuf/api/pylintrc pylint {toxinidir}/tuf/client_rework --rcfile={toxinidir}/tuf/api/pylintrc From ca1ff90889d4ba38ef59fc4232f85b6f4984e41f Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Mon, 22 Mar 2021 22:53:46 +0200 Subject: [PATCH 09/86] Fix various pylint issues Fix linter issues after applying the api/pylintrc config over the client_rework/* code. Signed-off-by: Teodora Sechkova --- tuf/client_rework/metadata_wrapper.py | 15 +-- tuf/client_rework/updater_rework.py | 152 ++++++++++++++++---------- 2 files changed, 102 insertions(+), 65 deletions(-) diff --git a/tuf/client_rework/metadata_wrapper.py b/tuf/client_rework/metadata_wrapper.py index 89b4a1da1e..6f182dc336 100644 --- a/tuf/client_rework/metadata_wrapper.py +++ b/tuf/client_rework/metadata_wrapper.py @@ -25,18 +25,19 @@ def __init__(self, meta): def from_json_object(cls, tmp_file): """Loads JSON-formatted TUF metadata from a file object.""" raw_data = tmp_file.read() - + # Use local scope import to avoid circular import errors + # pylint: disable=import-outside-toplevel from tuf.api.serialization.json import JSONDeserializer deserializer = JSONDeserializer() - _meta = deserializer.deserialize(raw_data) - return cls(meta=_meta) + meta = deserializer.deserialize(raw_data) + return cls(meta=meta) @classmethod def from_json_file(cls, filename): """Loads JSON-formatted TUF metadata from a file.""" - _meta = metadata.Metadata.from_file(filename) - return cls(meta=_meta) + meta = metadata.Metadata.from_file(filename) + return cls(meta=meta) @property def signed(self): @@ -97,7 +98,7 @@ def keys(self, role): keys = [] for keyid in self._meta.signed.roles[role]["keyids"]: key_metadata = self._meta.signed.keys[keyid] - key, _ = format_metadata_to_key(key_metadata) + key, dummy = format_metadata_to_key(key_metadata) keys.append(key) return keys @@ -162,7 +163,7 @@ def keys(self, role): if delegation["name"] == role: for keyid in delegation["keyids"]: key_metadata = self._meta.signed.delegations["keys"][keyid] - key, _ = format_metadata_to_key(key_metadata) + key, dummy = format_metadata_to_key(key_metadata) keys.append(key) return keys diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 9e04c20475..2d9dfcf5b1 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -60,12 +60,7 @@ def __init__( self._repository_name = repository_name self._mirrors = repository_mirrors self._consistent_snapshot = False - self._metadata = { - "root": {}, - "timestamp": {}, - "snapshot": {}, - "targets": {}, - } + self._metadata = {} if fetcher is None: self._fetcher = RequestsFetcher() @@ -99,9 +94,8 @@ def get_one_valid_targetinfo(self, filename: str) -> Dict: """ return self._preorder_depth_first_walk(filename) - def updated_targets( - self, targets: Dict, destination_directory: str - ) -> Dict: + @staticmethod + def updated_targets(targets: Dict, destination_directory: str) -> Dict: """ After the client has retrieved the target information for those targets they are interested in updating, they would call this method to @@ -157,16 +151,18 @@ def download_target(self, target: Dict, destination_directory: str): The file is saved to the 'destination_directory' argument. """ - for temp_obj in self._mirror_target_download(target): - try: + try: + for temp_obj in self._mirror_target_download(target): self._verify_target_file(temp_obj, target) # break? should we break after first successful download? - except Exception as exception: - # TODO: do something with exceptions - raise - filepath = os.path.join(destination_directory, target["filepath"]) - securesystemslib.util.persist_temp_file(temp_obj, filepath) + filepath = os.path.join( + destination_directory, target["filepath"] + ) + securesystemslib.util.persist_temp_file(temp_obj, filepath) + except Exception: + # TODO: do something with exceptions + raise def _mirror_meta_download(self, filename: str, upper_length: int) -> TextIO: """ @@ -242,8 +238,9 @@ def _get_full_meta_name( filename, ) + @staticmethod def _get_relative_meta_name( - self, role: str, extension: str = ".json", version: int = None + role: str, extension: str = ".json", version: int = None ) -> str: """ Helper method returning full metadata file path given the role name @@ -269,7 +266,7 @@ def _load_root(self) -> None: # Update the root role # 1.1. Let N denote the version number of the trusted # root metadata file. - lower_bound = self._metadata["root"]._meta.signed.version + lower_bound = self._metadata["root"].version upper_bound = lower_bound + tuf.settings.MAX_NUMBER_ROOT_ROTATIONS verified_root = None @@ -284,7 +281,7 @@ def _load_root(self) -> None: try: verified_root = self._verify_root(temp_obj) - except Exception as exception: + except Exception: raise except tuf.exceptions.NoWorkingMirrorError as exception: @@ -299,7 +296,7 @@ def _load_root(self) -> None: # than the expiration timestamp in the trusted root metadata file try: verified_root.expires() - except Exception: + except tuf.exceptions.ExpiredMetadataError: temp_obj.close() # 1.9. If the timestamp and / or snapshot keys have been rotated, @@ -328,9 +325,13 @@ def _load_root(self) -> None: self._consistent_snapshot = self._metadata[ "root" ].signed.consistent_snapshot + temp_obj.close() def _load_timestamp(self) -> None: + """ + TODO + """ # TODO Check if timestamp exists locally for temp_obj in self._mirror_meta_download( "timestamp.json", tuf.settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH @@ -338,7 +339,7 @@ def _load_timestamp(self) -> None: try: verified_tampstamp = self._verify_timestamp(temp_obj) # break? should we break after first successful download? - except Exception as exception: + except Exception: # TODO: do something with exceptions temp_obj.close() raise @@ -353,16 +354,19 @@ def _load_timestamp(self) -> None: temp_obj.close() def _load_snapshot(self) -> None: - + """ + TODO + """ try: length = self._metadata["timestamp"].snapshot["length"] except KeyError: length = tuf.settings.DEFAULT_SNAPSHOT_REQUIRED_LENGTH - if self._consistent_snapshot: - version = self._metadata["timestamp"].snapshot["version"] - else: - version = None + # Uncomment when implementing consistent_snapshot + # if self._consistent_snapshot: + # version = self._metadata["timestamp"].snapshot["version"] + # else: + # version = None # Check if exists locally # self.loadLocal('snapshot', snapshotVerifier) @@ -370,7 +374,7 @@ def _load_snapshot(self) -> None: try: verified_snapshot = self._verify_snapshot(temp_obj) # break? should we break after first successful download? - except Exception as exception: + except Exception: # TODO: do something with exceptions temp_obj.close() raise @@ -385,15 +389,19 @@ def _load_snapshot(self) -> None: temp_obj.close() def _load_targets(self, targets_role: str, parent_role: str) -> None: + """ + TODO + """ try: length = self._metadata["snapshot"].role(targets_role)["length"] except KeyError: length = tuf.settings.DEFAULT_TARGETS_REQUIRED_LENGTH - if self._consistent_snapshot: - version = self._metadata["snapshot"].role(targets_role)["version"] - else: - version = None + # Uncomment when implementing consistent_snapshot + # if self._consistent_snapshot: + # version = self._metadata["snapshot"].role(targets_role)["version"] + # else: + # version = None # Check if exists locally # self.loadLocal('snapshot', targetsVerifier) @@ -406,7 +414,7 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: temp_obj, targets_role, parent_role ) # break? should we break after first successful download? - except Exception as exception: + except Exception: # TODO: do something with exceptions temp_obj.close() raise @@ -420,6 +428,9 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: temp_obj.close() def _verify_root(self, temp_obj: TextIO) -> RootWrapper: + """ + TODO + """ intermediate_root = RootWrapper.from_json_object(temp_obj) @@ -444,6 +455,9 @@ def _verify_root(self, temp_obj: TextIO) -> RootWrapper: return intermediate_root def _verify_timestamp(self, temp_obj: TextIO) -> TimestampWrapper: + """ + TODO + """ intermediate_timestamp = TimestampWrapper.from_json_object(temp_obj) # Check for an arbitrary software attack @@ -453,7 +467,7 @@ def _verify_timestamp(self, temp_obj: TextIO) -> TimestampWrapper: ) # Check for a rollback attack. - if self._metadata["timestamp"]: + if self._metadata.get("timestamp"): if ( intermediate_timestamp.signed.version <= self._metadata["timestamp"].version @@ -465,7 +479,7 @@ def _verify_timestamp(self, temp_obj: TextIO) -> TimestampWrapper: self._metadata["timestamp"].version(), ) - if self._metadata["snapshot"]: + if self._metadata.get("snapshot"): if ( intermediate_timestamp.snapshot.version <= self._metadata["timestamp"].snapshot["version"] @@ -482,6 +496,9 @@ def _verify_timestamp(self, temp_obj: TextIO) -> TimestampWrapper: return intermediate_timestamp def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: + """ + TODO + """ # Check against timestamp metadata if self._metadata["timestamp"].snapshot.get("hash"): @@ -505,7 +522,7 @@ def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: ) # Check for a rollback attack - if self._metadata["snapshot"]: + if self._metadata.get("snapshot"): for target_role in intermediate_snapshot.signed.meta: if ( target_role["version"] @@ -521,6 +538,9 @@ def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: def _verify_targets( self, temp_obj: TextIO, filename: str, parent_role: str ) -> TargetsWrapper: + """ + TODO + """ # Check against timestamp metadata if self._metadata["snapshot"].role(filename).get("hash"): @@ -547,12 +567,19 @@ def _verify_targets( return intermediate_targets - def _verify_target_file(self, temp_obj: BinaryIO, targetinfo: Dict) -> None: + @staticmethod + def _verify_target_file(temp_obj: BinaryIO, targetinfo: Dict) -> None: + """ + TODO + """ _check_file_length(temp_obj, targetinfo["fileinfo"]["length"]) _check_hashes(temp_obj, targetinfo["fileinfo"]["hashes"]) def _preorder_depth_first_walk(self, target_filepath) -> Dict: + """ + TODO + """ target = None role_names = [("targets", "root")] @@ -577,7 +604,8 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: self._load_targets(role_name, parent_role) # Skip any visited current role to prevent cycles. if (role_name, parent_role) in visited_role_names: - logger.debug(f"Skipping visited current role {role_name}") + msg = f"Skipping visited current role {role_name}" + logger.debug(msg) continue # The metadata for 'role_name' must be downloaded/updated before @@ -590,8 +618,7 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: # refresh_all_delegated_roles=False) role_metadata = self._metadata[role_name] - targets = role_metadata.targets - target = targets.get(target_filepath) + target = role_metadata.targets.get(target_filepath) # After preorder check, add current role to set of visited roles. visited_role_names.add((role_name, parent_role)) @@ -615,10 +642,11 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: child_role["terminating"] and child_role_name is not None ): - logger.debug( - "Adding child role " + repr(child_role_name) + msg = ( + f"Adding child role {child_role_name}.\n", + "Not backtracking to other roles.", ) - logger.debug("Not backtracking to other roles.") + logger.debug(msg) role_names = [] child_roles_to_visit.append( (child_role_name, role_name) @@ -626,14 +654,12 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: break if child_role_name is None: - logger.debug( - "Skipping child role " + repr(child_role_name) - ) + msg = f"Skipping child role {child_role_name}" + logger.debug(msg) else: - logger.debug( - "Adding child role " + repr(child_role_name) - ) + msg = f"Adding child role {child_role_name}" + logger.debug(msg) child_roles_to_visit.append( (child_role_name, role_name) ) @@ -645,20 +671,21 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: role_names.extend(child_roles_to_visit) else: - logger.debug("Found target in current role " + repr(role_name)) + msg = f"Found target in current role {role_name}" + logger.debug(msg) if ( target is None and number_of_delegations == 0 and len(role_names) > 0 ): - logger.debug( - repr(len(role_names)) - + " roles left to visit, " - + "but allowed to visit at most " - + repr(tuf.settings.MAX_NUMBER_OF_DELEGATIONS) - + " delegations." + msg = ( + f"{len(role_names)} roles left to visit, ", + "but allowed to visit at most ", + f"{tuf.settings.MAX_NUMBER_OF_DELEGATIONS}", + " delegations.", ) + logger.debug(msg) return {"filepath": target_filepath, "fileinfo": target} @@ -761,7 +788,9 @@ def _visit_child_role(child_role: Dict, target_filepath: str) -> str: def _check_file_length(file_object, trusted_file_length): - + """ + TODO + """ file_object.seek(0, 2) observed_length = file_object.tell() @@ -776,7 +805,9 @@ def _check_file_length(file_object, trusted_file_length): def _check_hashes(file_object, trusted_hashes): - + """ + TODO + """ # Verify each trusted hash of 'trusted_hashes'. If all are valid, simply # return. for algorithm, trusted_hash in trusted_hashes.items(): @@ -800,7 +831,9 @@ def _check_hashes(file_object, trusted_hashes): def _get_target_hash(target_filepath, hash_function="sha256"): - + """ + TODO + """ # Calculate the hash of the filepath to determine which bin to find the # target. The client currently assumes the repository (i.e., repository # tool) uses 'hash_function' to generate hashes and UTF-8. @@ -813,6 +846,9 @@ def _get_target_hash(target_filepath, hash_function="sha256"): def neither_403_nor_404(mirror_error): + """ + TODO + """ if isinstance(mirror_error, tuf.exceptions.FetcherHTTPError): if mirror_error.status_code in {403, 404}: return False From df6c319c817222c8223a08aa0461aa8ca2108224 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Mon, 22 Mar 2021 22:59:25 +0200 Subject: [PATCH 10/86] Disable exceptions related pylint checks Temporary disable (inline) try-except-raise and broad-except warnings in the new Updater code until client exception handling is revised (#1312). Signed-off-by: Teodora Sechkova --- tuf/client_rework/updater_rework.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 2d9dfcf5b1..9900e6ffef 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -160,6 +160,7 @@ def download_target(self, target: Dict, destination_directory: str): destination_directory, target["filepath"] ) securesystemslib.util.persist_temp_file(temp_obj, filepath) + # pylint: disable=try-except-raise except Exception: # TODO: do something with exceptions raise @@ -182,6 +183,7 @@ def _mirror_meta_download(self, filename: str, upper_length: int) -> TextIO: temp_obj.seek(0) yield temp_obj + # pylint: disable=broad-except except Exception as exception: file_mirror_errors[file_mirror] = exception @@ -209,7 +211,7 @@ def _mirror_target_download(self, fileinfo: str) -> BinaryIO: temp_obj.seek(0) yield temp_obj - + # pylint: disable=broad-except except Exception as exception: file_mirror_errors[file_mirror] = exception @@ -280,7 +282,7 @@ def _load_root(self) -> None: for temp_obj in mirror_download: try: verified_root = self._verify_root(temp_obj) - + # pylint: disable=try-except-raise except Exception: raise From d472989011a131397fd00f55f983ec8ab1ef3175 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Mon, 22 Mar 2021 23:13:53 +0200 Subject: [PATCH 11/86] Disable undefined-loop-variable checks Temporary disable (inline) undefined-loop-variable pylint checks in the new Updater code until the download functionality is revised (#1307). Signed-off-by: Teodora Sechkova --- tuf/client_rework/updater_rework.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 9900e6ffef..10fdcc415f 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -299,7 +299,7 @@ def _load_root(self) -> None: try: verified_root.expires() except tuf.exceptions.ExpiredMetadataError: - temp_obj.close() + temp_obj.close() # pylint: disable=undefined-loop-variable # 1.9. If the timestamp and / or snapshot keys have been rotated, # then delete the trusted timestamp and snapshot metadata files. @@ -328,7 +328,7 @@ def _load_root(self) -> None: "root" ].signed.consistent_snapshot - temp_obj.close() + temp_obj.close() # pylint: disable=undefined-loop-variable def _load_timestamp(self) -> None: """ @@ -353,7 +353,7 @@ def _load_timestamp(self) -> None: self._get_full_meta_name("timestamp.json") ) - temp_obj.close() + temp_obj.close() # pylint: disable=undefined-loop-variable def _load_snapshot(self) -> None: """ @@ -388,7 +388,7 @@ def _load_snapshot(self) -> None: self._get_full_meta_name("snapshot.json") ) - temp_obj.close() + temp_obj.close() # pylint: disable=undefined-loop-variable def _load_targets(self, targets_role: str, parent_role: str) -> None: """ @@ -427,7 +427,7 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: self._get_full_meta_name(targets_role, extension=".json") ) - temp_obj.close() + temp_obj.close() # pylint: disable=undefined-loop-variable def _verify_root(self, temp_obj: TextIO) -> RootWrapper: """ From 861bef6cf721addbd4867c4acff3fc9d82c9e4fa Mon Sep 17 00:00:00 2001 From: Velichka A Date: Wed, 31 Mar 2021 17:20:05 +0300 Subject: [PATCH 12/86] Update updater_rework.py Fixes imports to be vendoring compatible Signed-off-by: Velichka Atanasova --- tuf/client_rework/updater_rework.py | 75 +++++++++++++---------------- 1 file changed, 33 insertions(+), 42 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 10fdcc415f..ef7d33c648 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -10,25 +10,16 @@ import fnmatch import logging import os + from typing import BinaryIO, Dict, Optional, TextIO -import securesystemslib.exceptions -import securesystemslib.util +from securesystemslib import exceptions, util -import tuf.download -import tuf.exceptions -import tuf.formats -import tuf.mirrors -import tuf.settings +from tuf import download, exceptions, formats, mirrors, settings from tuf.client.fetcher import FetcherInterface from tuf.requests_fetcher import RequestsFetcher -from .metadata_wrapper import ( - RootWrapper, - SnapshotWrapper, - TargetsWrapper, - TimestampWrapper, -) +from .metadata_wrapper import RootWrapper, SnapshotWrapper, TargetsWrapper, TimestampWrapper # Globals logger = logging.getLogger(__name__) @@ -132,7 +123,7 @@ def updated_targets(targets: Dict, destination_directory: str) -> Dict: # This exception will occur if the target does not exist # locally. - except securesystemslib.exceptions.StorageError: + except exceptions.StorageError: updated_targets.append(target) updated_targetpaths.append(target_filepath) break @@ -159,7 +150,7 @@ def download_target(self, target: Dict, destination_directory: str): filepath = os.path.join( destination_directory, target["filepath"] ) - securesystemslib.util.persist_temp_file(temp_obj, filepath) + util.persist_temp_file(temp_obj, filepath) # pylint: disable=try-except-raise except Exception: # TODO: do something with exceptions @@ -169,14 +160,14 @@ def _mirror_meta_download(self, filename: str, upper_length: int) -> TextIO: """ Download metadata file from the list of metadata mirrors """ - file_mirrors = tuf.mirrors.get_list_of_mirrors( + file_mirrors = mirrors.get_list_of_mirrors( "meta", filename, self._mirrors ) file_mirror_errors = {} for file_mirror in file_mirrors: try: - temp_obj = tuf.download.unsafe_download( + temp_obj = download.unsafe_download( file_mirror, upper_length, self._fetcher ) @@ -189,7 +180,7 @@ def _mirror_meta_download(self, filename: str, upper_length: int) -> TextIO: finally: if file_mirror_errors: - raise tuf.exceptions.NoWorkingMirrorError( + raise exceptions.NoWorkingMirrorError( file_mirror_errors ) @@ -198,14 +189,14 @@ def _mirror_target_download(self, fileinfo: str) -> BinaryIO: Download target file from the list of target mirrors """ # full_filename = _get_full_name(filename) - file_mirrors = tuf.mirrors.get_list_of_mirrors( + file_mirrors = mirrors.get_list_of_mirrors( "target", fileinfo["filepath"], self._mirrors ) file_mirror_errors = {} for file_mirror in file_mirrors: try: - temp_obj = tuf.download.safe_download( + temp_obj = download.safe_download( file_mirror, fileinfo["fileinfo"]["length"], self._fetcher ) @@ -217,7 +208,7 @@ def _mirror_target_download(self, fileinfo: str) -> BinaryIO: finally: if file_mirror_errors: - raise tuf.exceptions.NoWorkingMirrorError( + raise exceptions.NoWorkingMirrorError( file_mirror_errors ) @@ -233,7 +224,7 @@ def _get_full_meta_name( else: filename = str(version) + "." + role + extension return os.path.join( - tuf.settings.repositories_directory, + settings.repositories_directory, self._repository_name, "metadata", "current", @@ -269,14 +260,14 @@ def _load_root(self) -> None: # 1.1. Let N denote the version number of the trusted # root metadata file. lower_bound = self._metadata["root"].version - upper_bound = lower_bound + tuf.settings.MAX_NUMBER_ROOT_ROTATIONS + upper_bound = lower_bound + settings.MAX_NUMBER_ROOT_ROTATIONS verified_root = None for next_version in range(lower_bound, upper_bound): try: mirror_download = self._mirror_meta_download( self._get_relative_meta_name("root", version=next_version), - tuf.settings.DEFAULT_ROOT_REQUIRED_LENGTH, + settings.DEFAULT_ROOT_REQUIRED_LENGTH, ) for temp_obj in mirror_download: @@ -286,7 +277,7 @@ def _load_root(self) -> None: except Exception: raise - except tuf.exceptions.NoWorkingMirrorError as exception: + except exceptions.NoWorkingMirrorError as exception: for mirror_error in exception.mirror_errors.values(): if neither_403_nor_404(mirror_error): temp_obj.close() @@ -298,7 +289,7 @@ def _load_root(self) -> None: # than the expiration timestamp in the trusted root metadata file try: verified_root.expires() - except tuf.exceptions.ExpiredMetadataError: + except exceptions.ExpiredMetadataError: temp_obj.close() # pylint: disable=undefined-loop-variable # 1.9. If the timestamp and / or snapshot keys have been rotated, @@ -336,7 +327,7 @@ def _load_timestamp(self) -> None: """ # TODO Check if timestamp exists locally for temp_obj in self._mirror_meta_download( - "timestamp.json", tuf.settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH + "timestamp.json", settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH ): try: verified_tampstamp = self._verify_timestamp(temp_obj) @@ -362,7 +353,7 @@ def _load_snapshot(self) -> None: try: length = self._metadata["timestamp"].snapshot["length"] except KeyError: - length = tuf.settings.DEFAULT_SNAPSHOT_REQUIRED_LENGTH + length = settings.DEFAULT_SNAPSHOT_REQUIRED_LENGTH # Uncomment when implementing consistent_snapshot # if self._consistent_snapshot: @@ -397,7 +388,7 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: try: length = self._metadata["snapshot"].role(targets_role)["length"] except KeyError: - length = tuf.settings.DEFAULT_TARGETS_REQUIRED_LENGTH + length = settings.DEFAULT_TARGETS_REQUIRED_LENGTH # Uncomment when implementing consistent_snapshot # if self._consistent_snapshot: @@ -448,7 +439,7 @@ def _verify_root(self, temp_obj: TextIO) -> RootWrapper: # Check for a rollback attack. if intermediate_root.version < trusted_root.version: temp_obj.close() - raise tuf.exceptions.ReplayedMetadataError( + raise exceptions.ReplayedMetadataError( "root", intermediate_root.version(), trusted_root.version() ) # Note that the expiration of the new (intermediate) root metadata @@ -475,7 +466,7 @@ def _verify_timestamp(self, temp_obj: TextIO) -> TimestampWrapper: <= self._metadata["timestamp"].version ): temp_obj.close() - raise tuf.exceptions.ReplayedMetadataError( + raise exceptions.ReplayedMetadataError( "root", intermediate_timestamp.version(), self._metadata["timestamp"].version(), @@ -487,7 +478,7 @@ def _verify_timestamp(self, temp_obj: TextIO) -> TimestampWrapper: <= self._metadata["timestamp"].snapshot["version"] ): temp_obj.close() - raise tuf.exceptions.ReplayedMetadataError( + raise exceptions.ReplayedMetadataError( "root", intermediate_timestamp.snapshot.version(), self._metadata["snapshot"].version(), @@ -515,7 +506,7 @@ def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: != self._metadata["timestamp"].snapshot["version"] ): temp_obj.close() - raise tuf.exceptions.BadVersionNumberError + raise exceptions.BadVersionNumberError # Check for an arbitrary software attack trusted_root = self._metadata["root"] @@ -531,7 +522,7 @@ def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: != self._metadata["snapshot"].meta[target_role]["version"] ): temp_obj.close() - raise tuf.exceptions.BadVersionNumberError + raise exceptions.BadVersionNumberError intermediate_snapshot.expires() @@ -556,7 +547,7 @@ def _verify_targets( != self._metadata["snapshot"].role(filename)["version"] ): temp_obj.close() - raise tuf.exceptions.BadVersionNumberError + raise exceptions.BadVersionNumberError # Check for an arbitrary software attack parent_role = self._metadata[parent_role] @@ -586,12 +577,12 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: target = None role_names = [("targets", "root")] visited_role_names = set() - number_of_delegations = tuf.settings.MAX_NUMBER_OF_DELEGATIONS + number_of_delegations = settings.MAX_NUMBER_OF_DELEGATIONS # Ensure the client has the most up-to-date version of 'targets.json'. - # Raise 'tuf.exceptions.NoWorkingMirrorError' if the changed metadata + # Raise 'exceptions.NoWorkingMirrorError' if the changed metadata # cannot be successfully downloaded and - # 'tuf.exceptions.RepositoryError' if the referenced metadata is + # 'exceptions.RepositoryError' if the referenced metadata is # missing. Target methods such as this one are called after the # top-level metadata have been refreshed (i.e., updater.refresh()). # self._update_metadata_if_changed('targets') @@ -684,7 +675,7 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: msg = ( f"{len(role_names)} roles left to visit, ", "but allowed to visit at most ", - f"{tuf.settings.MAX_NUMBER_OF_DELEGATIONS}", + f"{settings.MAX_NUMBER_OF_DELEGATIONS}", " delegations.", ) logger.debug(msg) @@ -780,7 +771,7 @@ def _visit_child_role(child_role: Dict, target_filepath: str) -> str: # 'role_name' should have been validated when it was downloaded. # The 'paths' or 'path_hash_prefixes' fields should not be missing, # so we raise a format error here in case they are both missing. - raise tuf.exceptions.FormatError( + raise exceptions.FormatError( repr(child_role_name) + " " 'has neither a "paths" nor "path_hash_prefixes". At least' " one of these attributes must be present." @@ -801,7 +792,7 @@ def _check_file_length(file_object, trusted_file_length): # ensures that a downloaded file strictly matches a known, or trusted, # file length. if observed_length != trusted_file_length: - raise tuf.exceptions.DownloadLengthMismatchError( + raise exceptions.DownloadLengthMismatchError( trusted_file_length, observed_length ) @@ -851,7 +842,7 @@ def neither_403_nor_404(mirror_error): """ TODO """ - if isinstance(mirror_error, tuf.exceptions.FetcherHTTPError): + if isinstance(mirror_error, exceptions.FetcherHTTPError): if mirror_error.status_code in {403, 404}: return False return True From 58f9b4c074c2dad2e122b3447d76bea3d2d3482b Mon Sep 17 00:00:00 2001 From: Velichka Atanasova Date: Mon, 5 Apr 2021 14:57:41 +0300 Subject: [PATCH 13/86] Address CI failure Fixes isort issue. Signed-off-by: Velichka Atanasova --- tuf/client_rework/updater_rework.py | 39 ++++++++++++++--------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index ef7d33c648..c7820a4ab4 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -10,16 +10,21 @@ import fnmatch import logging import os - from typing import BinaryIO, Dict, Optional, TextIO -from securesystemslib import exceptions, util +from securesystemslib import exceptions as sslib_exceptions +from securesystemslib import hash as sslib_hash +from securesystemslib import util as sslib_util -from tuf import download, exceptions, formats, mirrors, settings +from tuf import download, exceptions, mirrors, requests_fetcher, settings from tuf.client.fetcher import FetcherInterface -from tuf.requests_fetcher import RequestsFetcher -from .metadata_wrapper import RootWrapper, SnapshotWrapper, TargetsWrapper, TimestampWrapper +from .metadata_wrapper import ( + RootWrapper, + SnapshotWrapper, + TargetsWrapper, + TimestampWrapper, +) # Globals logger = logging.getLogger(__name__) @@ -54,7 +59,7 @@ def __init__( self._metadata = {} if fetcher is None: - self._fetcher = RequestsFetcher() + self._fetcher = requests_fetcher.RequestsFetcher() else: self._fetcher = fetcher @@ -117,13 +122,13 @@ def updated_targets(targets: Dict, destination_directory: str) -> Dict: for algorithm, digest in target["fileinfo"]["hashes"].items(): digest_object = None try: - digest_object = securesystemslib.hash.digest_filename( + digest_object = sslib_hash.digest_filename( target_filepath, algorithm=algorithm ) # This exception will occur if the target does not exist # locally. - except exceptions.StorageError: + except sslib_exceptions.StorageError: updated_targets.append(target) updated_targetpaths.append(target_filepath) break @@ -150,7 +155,7 @@ def download_target(self, target: Dict, destination_directory: str): filepath = os.path.join( destination_directory, target["filepath"] ) - util.persist_temp_file(temp_obj, filepath) + sslib_util.persist_temp_file(temp_obj, filepath) # pylint: disable=try-except-raise except Exception: # TODO: do something with exceptions @@ -180,9 +185,7 @@ def _mirror_meta_download(self, filename: str, upper_length: int) -> TextIO: finally: if file_mirror_errors: - raise exceptions.NoWorkingMirrorError( - file_mirror_errors - ) + raise exceptions.NoWorkingMirrorError(file_mirror_errors) def _mirror_target_download(self, fileinfo: str) -> BinaryIO: """ @@ -208,9 +211,7 @@ def _mirror_target_download(self, fileinfo: str) -> BinaryIO: finally: if file_mirror_errors: - raise exceptions.NoWorkingMirrorError( - file_mirror_errors - ) + raise exceptions.NoWorkingMirrorError(file_mirror_errors) def _get_full_meta_name( self, role: str, extension: str = ".json", version: int = None @@ -804,7 +805,7 @@ def _check_hashes(file_object, trusted_hashes): # Verify each trusted hash of 'trusted_hashes'. If all are valid, simply # return. for algorithm, trusted_hash in trusted_hashes.items(): - digest_object = securesystemslib.hash.digest(algorithm) + digest_object = sslib_hash.digest(algorithm) # Ensure we read from the beginning of the file object # TODO: should we store file position (before the loop) and reset # after we seek about? @@ -814,9 +815,7 @@ def _check_hashes(file_object, trusted_hashes): # Raise an exception if any of the hashes are incorrect. if trusted_hash != computed_hash: - raise securesystemslib.exceptions.BadHashError( - trusted_hash, computed_hash - ) + raise sslib_exceptions.BadHashError(trusted_hash, computed_hash) logger.info( "The file's " + algorithm + " hash is" " correct: " + trusted_hash @@ -830,7 +829,7 @@ def _get_target_hash(target_filepath, hash_function="sha256"): # Calculate the hash of the filepath to determine which bin to find the # target. The client currently assumes the repository (i.e., repository # tool) uses 'hash_function' to generate hashes and UTF-8. - digest_object = securesystemslib.hash.digest(hash_function) + digest_object = sslib_hash.digest(hash_function) encoded_target_filepath = target_filepath.encode("utf-8") digest_object.update(encoded_target_filepath) target_filepath_hash = digest_object.hexdigest() From 35587c4a09102d6ee7a15aa7b38ecae677b9683c Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 25 Mar 2021 16:38:03 +0200 Subject: [PATCH 14/86] Move download modules inside client directory The modules performing network download are used only by the client side of TUF. Move them inside the client directory for the refactored client. Move the _mirror_*download functions from Updater to mirrors.py. Signed-off-by: Teodora Sechkova --- tuf/client_rework/download.py | 323 ++++++++++++++++++++++++++ tuf/client_rework/fetcher.py | 38 +++ tuf/client_rework/mirrors.py | 194 ++++++++++++++++ tuf/client_rework/requests_fetcher.py | 173 ++++++++++++++ tuf/client_rework/updater_rework.py | 83 ++----- 5 files changed, 750 insertions(+), 61 deletions(-) create mode 100644 tuf/client_rework/download.py create mode 100644 tuf/client_rework/fetcher.py create mode 100644 tuf/client_rework/mirrors.py create mode 100644 tuf/client_rework/requests_fetcher.py diff --git a/tuf/client_rework/download.py b/tuf/client_rework/download.py new file mode 100644 index 0000000000..2d946ef891 --- /dev/null +++ b/tuf/client_rework/download.py @@ -0,0 +1,323 @@ +#!/usr/bin/env python + +# Copyright 2012 - 2017, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +""" + + download.py + + + February 21, 2012. Based on previous version by Geremy Condra. + + + Konstantin Andrianov + Vladimir Diaz + + + See LICENSE-MIT OR LICENSE for licensing information. + + + Download metadata and target files and check their validity. The hash and + length of a downloaded file has to match the hash and length supplied by the + metadata of that file. +""" + +# Help with Python 3 compatibility, where the print statement is a function, an +# implicit relative import is invalid, and the '/' operator performs true +# division. Example: print 'hello world' raises a 'SyntaxError' exception. +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import unicode_literals + +import logging +import timeit +import tempfile + +import securesystemslib +import securesystemslib.util +import six + +import tuf +import tuf.exceptions +import tuf.formats + +# See 'log.py' to learn how logging is handled in TUF. +logger = logging.getLogger(__name__) + + +def safe_download(url, required_length, fetcher): + """ + + Given the 'url' and 'required_length' of the desired file, open a connection + to 'url', download it, and return the contents of the file. Also ensure + the length of the downloaded file matches 'required_length' exactly. + tuf.download.unsafe_download() may be called if an upper download limit is + preferred. + + + url: + A URL string that represents the location of the file. + + required_length: + An integer value representing the length of the file. This is an exact + limit. + + fetcher: + An object implementing FetcherInterface that performs the network IO + operations. + + + A file object is created on disk to store the contents of 'url'. + + + tuf.ssl_commons.exceptions.DownloadLengthMismatchError, if there was a + mismatch of observed vs expected lengths while downloading the file. + + securesystemslib.exceptions.FormatError, if any of the arguments are + improperly formatted. + + Any other unforeseen runtime exception. + + + A file object that points to the contents of 'url'. + """ + + # Do all of the arguments have the appropriate format? + # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. + securesystemslib.formats.URL_SCHEMA.check_match(url) + tuf.formats.LENGTH_SCHEMA.check_match(required_length) + + return _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True) + + + + + +def unsafe_download(url, required_length, fetcher): + """ + + Given the 'url' and 'required_length' of the desired file, open a connection + to 'url', download it, and return the contents of the file. Also ensure + the length of the downloaded file is up to 'required_length', and no larger. + tuf.download.safe_download() may be called if an exact download limit is + preferred. + + + url: + A URL string that represents the location of the file. + + required_length: + An integer value representing the length of the file. This is an upper + limit. + + fetcher: + An object implementing FetcherInterface that performs the network IO + operations. + + + A file object is created on disk to store the contents of 'url'. + + + tuf.ssl_commons.exceptions.DownloadLengthMismatchError, if there was a + mismatch of observed vs expected lengths while downloading the file. + + securesystemslib.exceptions.FormatError, if any of the arguments are + improperly formatted. + + Any other unforeseen runtime exception. + + + A file object that points to the contents of 'url'. + """ + + # Do all of the arguments have the appropriate format? + # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. + securesystemslib.formats.URL_SCHEMA.check_match(url) + tuf.formats.LENGTH_SCHEMA.check_match(required_length) + + return _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=False) + + + + + +def _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): + """ + + Given the url and length of the desired file, this function opens a + connection to 'url' and downloads the file while ensuring its length + matches 'required_length' if 'STRICT_REQUIRED_LENGH' is True (If False, + the file's length is not checked and a slow retrieval exception is raised + if the downloaded rate falls below the acceptable rate). + + + url: + A URL string that represents the location of the file. + + required_length: + An integer value representing the length of the file. + + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + + A file object is created on disk to store the contents of 'url'. + + + tuf.exceptions.DownloadLengthMismatchError, if there was a + mismatch of observed vs expected lengths while downloading the file. + + securesystemslib.exceptions.FormatError, if any of the arguments are + improperly formatted. + + Any other unforeseen runtime exception. + + + A file object that points to the contents of 'url'. + """ + # 'url.replace('\\', '/')' is needed for compatibility with Windows-based + # systems, because they might use back-slashes in place of forward-slashes. + # This converts it to the common format. unquote() replaces %xx escapes in a + # url with their single-character equivalent. A back-slash may be encoded as + # %5c in the url, which should also be replaced with a forward slash. + url = six.moves.urllib.parse.unquote(url).replace('\\', '/') + logger.info('Downloading: ' + repr(url)) + + # This is the temporary file that we will return to contain the contents of + # the downloaded file. + temp_file = tempfile.TemporaryFile() + + average_download_speed = 0 + number_of_bytes_received = 0 + + try: + chunks = fetcher.fetch(url, required_length) + start_time = timeit.default_timer() + for chunk in chunks: + + stop_time = timeit.default_timer() + temp_file.write(chunk) + + # Measure the average download speed. + number_of_bytes_received += len(chunk) + seconds_spent_receiving = stop_time - start_time + average_download_speed = number_of_bytes_received / seconds_spent_receiving + + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + logger.debug('The average download speed dropped below the minimum' + ' average download speed set in tuf.settings.py. Stopping the' + ' download!') + break + + else: + logger.debug('The average download speed has not dipped below the' + ' minimum average download speed set in tuf.settings.py.') + + # Does the total number of downloaded bytes match the required length? + _check_downloaded_length(number_of_bytes_received, required_length, + STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH, + average_download_speed=average_download_speed) + + except Exception: + # Close 'temp_file'. Any written data is lost. + temp_file.close() + logger.debug('Could not download URL: ' + repr(url)) + raise + + else: + return temp_file + + + + +def _check_downloaded_length(total_downloaded, required_length, + STRICT_REQUIRED_LENGTH=True, + average_download_speed=None): + """ + + A helper function which checks whether the total number of downloaded bytes + matches our expectation. + + + total_downloaded: + The total number of bytes supposedly downloaded for the file in question. + + required_length: + The total number of bytes expected of the file as seen from its metadata. + The Timestamp role is always downloaded without a known file length, and + the Root role when the client cannot download any of the required + top-level roles. In both cases, 'required_length' is actually an upper + limit on the length of the downloaded file. + + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + average_download_speed: + The average download speed for the downloaded file. + + + None. + + + securesystemslib.exceptions.DownloadLengthMismatchError, if + STRICT_REQUIRED_LENGTH is True and total_downloaded is not equal + required_length. + + tuf.exceptions.SlowRetrievalError, if the total downloaded was + done in less than the acceptable download speed (as set in + tuf.settings.py). + + + None. + """ + + if total_downloaded == required_length: + logger.info('Downloaded ' + str(total_downloaded) + ' bytes out of the' + ' expected ' + str(required_length) + ' bytes.') + + else: + difference_in_bytes = abs(total_downloaded - required_length) + + # What we downloaded is not equal to the required length, but did we ask + # for strict checking of required length? + if STRICT_REQUIRED_LENGTH: + logger.info('Downloaded ' + str(total_downloaded) + ' bytes, but' + ' expected ' + str(required_length) + ' bytes. There is a difference' + ' of ' + str(difference_in_bytes) + ' bytes.') + + # If the average download speed is below a certain threshold, we flag + # this as a possible slow-retrieval attack. + logger.debug('Average download speed: ' + repr(average_download_speed)) + logger.debug('Minimum average download speed: ' + repr(tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED)) + + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + raise tuf.exceptions.SlowRetrievalError(average_download_speed) + + else: + logger.debug('Good average download speed: ' + + repr(average_download_speed) + ' bytes per second') + + raise tuf.exceptions.DownloadLengthMismatchError(required_length, total_downloaded) + + else: + # We specifically disabled strict checking of required length, but we + # will log a warning anyway. This is useful when we wish to download the + # Timestamp or Root metadata, for which we have no signed metadata; so, + # we must guess a reasonable required_length for it. + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + raise tuf.exceptions.SlowRetrievalError(average_download_speed) + + else: + logger.debug('Good average download speed: ' + + repr(average_download_speed) + ' bytes per second') + + logger.info('Downloaded ' + str(total_downloaded) + ' bytes out of an' + ' upper limit of ' + str(required_length) + ' bytes.') diff --git a/tuf/client_rework/fetcher.py b/tuf/client_rework/fetcher.py new file mode 100644 index 0000000000..8768bdd4b9 --- /dev/null +++ b/tuf/client_rework/fetcher.py @@ -0,0 +1,38 @@ +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Provides an interface for network IO abstraction. +""" + +# Imports +import abc + +# Classes +class FetcherInterface(): + """Defines an interface for abstract network download. + + By providing a concrete implementation of the abstract interface, + users of the framework can plug-in their preferred/customized + network stack. + """ + + __metaclass__ = abc.ABCMeta + + @abc.abstractmethod + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in bytes. + + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ + raise NotImplementedError # pragma: no cover diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py new file mode 100644 index 0000000000..a9e4dd266b --- /dev/null +++ b/tuf/client_rework/mirrors.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python + +# Copyright 2012 - 2017, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +""" + + mirrors.py + + + Konstantin Andrianov. + Derived from original mirrors.py written by Geremy Condra. + + + March 12, 2012. + + + See LICENSE-MIT OR LICENSE for licensing information. + + + Extract a list of mirror urls corresponding to the file type and the location + of the file with respect to the base url. +""" + +# Help with Python 3 compatibility, where the print statement is a function, an +# implicit relative import is invalid, and the '/' operator performs true +# division. Example: print 'hello world' raises a 'SyntaxError' exception. +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import unicode_literals + +from typing import TextIO, BinaryIO, Dict + +import os + +import tuf +import tuf.formats +import tuf.client_rework.download as download + +import securesystemslib +import six + +# The type of file to be downloaded from a repository. The +# 'get_list_of_mirrors' function supports these file types. +_SUPPORTED_FILE_TYPES = ['meta', 'target'] + + +def get_list_of_mirrors(file_type, file_path, mirrors_dict): + """ + + Get a list of mirror urls from a mirrors dictionary, provided the type + and the path of the file with respect to the base url. + + + file_type: + Type of data needed for download, must correspond to one of the strings + in the list ['meta', 'target']. 'meta' for metadata file type or + 'target' for target file type. It should correspond to + NAME_SCHEMA format. + + file_path: + A relative path to the file that corresponds to RELPATH_SCHEMA format. + Ex: 'http://url_prefix/targets_path/file_path' + + mirrors_dict: + A mirrors_dict object that corresponds to MIRRORDICT_SCHEMA, where + keys are strings and values are MIRROR_SCHEMA. An example format + of MIRROR_SCHEMA: + + {'url_prefix': 'http://localhost:8001', + 'metadata_path': 'metadata/', + 'targets_path': 'targets/', + 'confined_target_dirs': ['targets/snapshot1/', ...], + 'custom': {...}} + + The 'custom' field is optional. + + + securesystemslib.exceptions.Error, on unsupported 'file_type'. + + securesystemslib.exceptions.FormatError, on bad argument. + + + List of mirror urls corresponding to the file_type and file_path. If no + match is found, empty list is returned. + """ + + # Checking if all the arguments have appropriate format. + tuf.formats.RELPATH_SCHEMA.check_match(file_path) + tuf.formats.MIRRORDICT_SCHEMA.check_match(mirrors_dict) + securesystemslib.formats.NAME_SCHEMA.check_match(file_type) + + # Verify 'file_type' is supported. + if file_type not in _SUPPORTED_FILE_TYPES: + raise securesystemslib.exceptions.Error('Invalid file_type argument.' + ' Supported file types: ' + repr(_SUPPORTED_FILE_TYPES)) + path_key = 'metadata_path' if file_type == 'meta' else 'targets_path' + + # Reference to 'securesystemslib.util.file_in_confined_directories()' (improve + # readability). This function checks whether a mirror should serve a file to + # the client. A client may be confined to certain paths on a repository + # mirror when fetching target files. This field may be set by the client + # when the repository mirror is added to the 'tuf.client.updater.Updater' + # object. + in_confined_directory = securesystemslib.util.file_in_confined_directories + + list_of_mirrors = [] + for junk, mirror_info in six.iteritems(mirrors_dict): + # Does mirror serve this file type at all? + path = mirror_info.get(path_key) + if path is None: + continue + + # for targets, ensure directory confinement + if path_key == 'targets_path': + full_filepath = os.path.join(path, file_path) + confined_target_dirs = mirror_info.get('confined_target_dirs') + # confined_target_dirs is an optional field + if confined_target_dirs and not in_confined_directory(full_filepath, + confined_target_dirs): + continue + + # urllib.quote(string) replaces special characters in string using the %xx + # escape. This is done to avoid parsing issues of the URL on the server + # side. Do *NOT* pass URLs with Unicode characters without first encoding + # the URL as UTF-8. We need a long-term solution with #61. + # http://bugs.python.org/issue1712522 + file_path = six.moves.urllib.parse.quote(file_path) + url = os.path.join(mirror_info['url_prefix'], path, file_path) + + # The above os.path.join() result as well as input file_path may be + # invalid on windows (might contain both separator types), see #1077. + # Make sure the URL doesn't contain backward slashes on Windows. + list_of_mirrors.append(url.replace('\\', '/')) + + return list_of_mirrors + + +def _mirror_meta_download(filename: str, upper_length: int, + mirrors_config: Dict, + fetcher: "FetcherInterface") -> TextIO: + """ + Download metadata file from the list of metadata mirrors + """ + file_mirrors = get_list_of_mirrors('meta', filename, mirrors_config) + + file_mirror_errors = {} + for file_mirror in file_mirrors: + try: + temp_obj = download.unsafe_download( + file_mirror, + upper_length, + fetcher) + + temp_obj.seek(0) + yield temp_obj + + except Exception as exception: + file_mirror_errors[file_mirror] = exception + + finally: + if file_mirror_errors: + raise tuf.exceptions.NoWorkingMirrorError( + file_mirror_errors) + + +def _mirror_target_download(fileinfo: str, mirrors_config: Dict, + fetcher: "FetcherInterface") -> BinaryIO: + """ + Download target file from the list of target mirrors + """ + # full_filename = _get_full_name(filename) + file_mirrors = get_list_of_mirrors('target', fileinfo['filepath'], + mirrors_config) + + file_mirror_errors = {} + for file_mirror in file_mirrors: + try: + temp_obj = download.safe_download( + file_mirror, + fileinfo['fileinfo']['length'], + fetcher) + + temp_obj.seek(0) + yield temp_obj + + except Exception as exception: + file_mirror_errors[file_mirror] = exception + + finally: + if file_mirror_errors: + raise tuf.exceptions.NoWorkingMirrorError( + file_mirror_errors) diff --git a/tuf/client_rework/requests_fetcher.py b/tuf/client_rework/requests_fetcher.py new file mode 100644 index 0000000000..8074890d25 --- /dev/null +++ b/tuf/client_rework/requests_fetcher.py @@ -0,0 +1,173 @@ +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Provides an implementation of FetcherInterface using the Requests HTTP + library. +""" + +# Imports +import requests +import six +import logging +import time + +import urllib3.exceptions + +import tuf.exceptions +import tuf.settings + +from tuf.client_rework.fetcher import FetcherInterface + +# Globals +logger = logging.getLogger(__name__) + +# Classess +class RequestsFetcher(FetcherInterface): + """A concrete implementation of FetcherInterface based on the Requests + library. + + Attributes: + _sessions: A dictionary of Requests.Session objects storing a separate + session per scheme+hostname combination. + """ + + def __init__(self): + # From http://docs.python-requests.org/en/master/user/advanced/#session-objects: + # + # "The Session object allows you to persist certain parameters across + # requests. It also persists cookies across all requests made from the + # Session instance, and will use urllib3's connection pooling. So if you're + # making several requests to the same host, the underlying TCP connection + # will be reused, which can result in a significant performance increase + # (see HTTP persistent connection)." + # + # NOTE: We use a separate requests.Session per scheme+hostname combination, + # in order to reuse connections to the same hostname to improve efficiency, + # but avoiding sharing state between different hosts-scheme combinations to + # minimize subtle security issues. Some cookies may not be HTTP-safe. + self._sessions = {} + + + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in bytes. + + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ + # Get a customized session for each new schema+hostname combination. + session = self._get_session(url) + + # Get the requests.Response object for this URL. + # + # Defer downloading the response body with stream=True. + # Always set the timeout. This timeout value is interpreted by requests as: + # - connect timeout (max delay before first byte is received) + # - read (gap) timeout (max delay between bytes received) + response = session.get(url, stream=True, + timeout=tuf.settings.SOCKET_TIMEOUT) + # Check response status. + try: + response.raise_for_status() + except requests.HTTPError as e: + response.close() + status = e.response.status_code + raise tuf.exceptions.FetcherHTTPError(str(e), status) + + + # Define a generator function to be returned by fetch. This way the caller + # of fetch can differentiate between connection and actual data download + # and measure download times accordingly. + def chunks(): + try: + bytes_received = 0 + while True: + # We download a fixed chunk of data in every round. This is so that we + # can defend against slow retrieval attacks. Furthermore, we do not + # wish to download an extremely large file in one shot. + # Before beginning the round, sleep (if set) for a short amount of + # time so that the CPU is not hogged in the while loop. + if tuf.settings.SLEEP_BEFORE_ROUND: + time.sleep(tuf.settings.SLEEP_BEFORE_ROUND) + + read_amount = min( + tuf.settings.CHUNK_SIZE, required_length - bytes_received) + + # NOTE: This may not handle some servers adding a Content-Encoding + # header, which may cause urllib3 to misbehave: + # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 + data = response.raw.read(read_amount) + bytes_received += len(data) + + # We might have no more data to read. Check number of bytes downloaded. + if not data: + logger.debug('Downloaded ' + repr(bytes_received) + '/' + + repr(required_length) + ' bytes.') + + # Finally, we signal that the download is complete. + break + + yield data + + if bytes_received >= required_length: + break + + except urllib3.exceptions.ReadTimeoutError as e: + raise tuf.exceptions.SlowRetrievalError(str(e)) + + finally: + response.close() + + return chunks() + + + + def _get_session(self, url): + """Returns a different customized requests.Session per schema+hostname + combination. + """ + # Use a different requests.Session per schema+hostname combination, to + # reuse connections while minimizing subtle security issues. + parsed_url = six.moves.urllib.parse.urlparse(url) + + if not parsed_url.scheme or not parsed_url.hostname: + raise tuf.exceptions.URLParsingError( + 'Could not get scheme and hostname from URL: ' + url) + + session_index = parsed_url.scheme + '+' + parsed_url.hostname + + logger.debug('url: ' + url) + logger.debug('session index: ' + session_index) + + session = self._sessions.get(session_index) + + if not session: + session = requests.Session() + self._sessions[session_index] = session + + # Attach some default headers to every Session. + requests_user_agent = session.headers['User-Agent'] + # Follows the RFC: https://tools.ietf.org/html/rfc7231#section-5.5.3 + tuf_user_agent = 'tuf/' + tuf.__version__ + ' ' + requests_user_agent + session.headers.update({ + # Tell the server not to compress or modify anything. + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#Directives + 'Accept-Encoding': 'identity', + # The TUF user agent. + 'User-Agent': tuf_user_agent}) + + logger.debug('Made new session for ' + session_index) + + else: + logger.debug('Reusing session for ' + session_index) + + return session diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index c7820a4ab4..8ddde1e4eb 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -16,8 +16,9 @@ from securesystemslib import hash as sslib_hash from securesystemslib import util as sslib_util -from tuf import download, exceptions, mirrors, requests_fetcher, settings +from tuf import exceptions, settings from tuf.client.fetcher import FetcherInterface +from tuf.client_rework import download, mirrors, requests_fetcher from .metadata_wrapper import ( RootWrapper, @@ -146,9 +147,11 @@ def download_target(self, target: Dict, destination_directory: str): This method performs the actual download of the specified target. The file is saved to the 'destination_directory' argument. """ - try: - for temp_obj in self._mirror_target_download(target): + for temp_obj in mirrors._mirror_target_download( + target, self._mirrors, self._fetcher + ): + self._verify_target_file(temp_obj, target) # break? should we break after first successful download? @@ -161,58 +164,6 @@ def download_target(self, target: Dict, destination_directory: str): # TODO: do something with exceptions raise - def _mirror_meta_download(self, filename: str, upper_length: int) -> TextIO: - """ - Download metadata file from the list of metadata mirrors - """ - file_mirrors = mirrors.get_list_of_mirrors( - "meta", filename, self._mirrors - ) - - file_mirror_errors = {} - for file_mirror in file_mirrors: - try: - temp_obj = download.unsafe_download( - file_mirror, upper_length, self._fetcher - ) - - temp_obj.seek(0) - yield temp_obj - - # pylint: disable=broad-except - except Exception as exception: - file_mirror_errors[file_mirror] = exception - - finally: - if file_mirror_errors: - raise exceptions.NoWorkingMirrorError(file_mirror_errors) - - def _mirror_target_download(self, fileinfo: str) -> BinaryIO: - """ - Download target file from the list of target mirrors - """ - # full_filename = _get_full_name(filename) - file_mirrors = mirrors.get_list_of_mirrors( - "target", fileinfo["filepath"], self._mirrors - ) - - file_mirror_errors = {} - for file_mirror in file_mirrors: - try: - temp_obj = download.safe_download( - file_mirror, fileinfo["fileinfo"]["length"], self._fetcher - ) - - temp_obj.seek(0) - yield temp_obj - # pylint: disable=broad-except - except Exception as exception: - file_mirror_errors[file_mirror] = exception - - finally: - if file_mirror_errors: - raise exceptions.NoWorkingMirrorError(file_mirror_errors) - def _get_full_meta_name( self, role: str, extension: str = ".json", version: int = None ) -> str: @@ -266,9 +217,11 @@ def _load_root(self) -> None: verified_root = None for next_version in range(lower_bound, upper_bound): try: - mirror_download = self._mirror_meta_download( + mirror_download = mirrors._mirror_meta_download( self._get_relative_meta_name("root", version=next_version), settings.DEFAULT_ROOT_REQUIRED_LENGTH, + self._mirrors, + self._fetcher, ) for temp_obj in mirror_download: @@ -327,9 +280,13 @@ def _load_timestamp(self) -> None: TODO """ # TODO Check if timestamp exists locally - for temp_obj in self._mirror_meta_download( - "timestamp.json", settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH + for temp_obj in mirrors._mirror_meta_download( + "timestamp.json", + settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, + self._mirrors, + self._fetcher, ): + try: verified_tampstamp = self._verify_timestamp(temp_obj) # break? should we break after first successful download? @@ -364,7 +321,10 @@ def _load_snapshot(self) -> None: # Check if exists locally # self.loadLocal('snapshot', snapshotVerifier) - for temp_obj in self._mirror_meta_download("snapshot.json", length): + for temp_obj in mirrors._mirror_meta_download( + "snapshot.json", length, self._mirrors, self._fetcher + ): + try: verified_snapshot = self._verify_snapshot(temp_obj) # break? should we break after first successful download? @@ -400,9 +360,10 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: # Check if exists locally # self.loadLocal('snapshot', targetsVerifier) - for temp_obj in self._mirror_meta_download( - targets_role + ".json", length + for temp_obj in mirrors._mirror_meta_download( + targets_role + ".json", length, self._mirrors, self._fetcher ): + try: verified_targets = self._verify_targets( temp_obj, targets_role, parent_role From 81747edf97ff03fbe028fc993c025bcabffb8b77 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 31 Mar 2021 12:38:06 +0300 Subject: [PATCH 15/86] Remove (un)safe_download functions The two functions safe/unsafe_download differ only by setting a single boolean flag. Remove them and call directly _download_file instead. Signed-off-by: Teodora Sechkova --- tuf/client_rework/download.py | 103 ++-------------------------------- tuf/client_rework/mirrors.py | 7 ++- 2 files changed, 10 insertions(+), 100 deletions(-) diff --git a/tuf/client_rework/download.py b/tuf/client_rework/download.py index 2d946ef891..4cc855f372 100644 --- a/tuf/client_rework/download.py +++ b/tuf/client_rework/download.py @@ -47,103 +47,7 @@ logger = logging.getLogger(__name__) -def safe_download(url, required_length, fetcher): - """ - - Given the 'url' and 'required_length' of the desired file, open a connection - to 'url', download it, and return the contents of the file. Also ensure - the length of the downloaded file matches 'required_length' exactly. - tuf.download.unsafe_download() may be called if an upper download limit is - preferred. - - - url: - A URL string that represents the location of the file. - - required_length: - An integer value representing the length of the file. This is an exact - limit. - - fetcher: - An object implementing FetcherInterface that performs the network IO - operations. - - - A file object is created on disk to store the contents of 'url'. - - - tuf.ssl_commons.exceptions.DownloadLengthMismatchError, if there was a - mismatch of observed vs expected lengths while downloading the file. - - securesystemslib.exceptions.FormatError, if any of the arguments are - improperly formatted. - - Any other unforeseen runtime exception. - - - A file object that points to the contents of 'url'. - """ - - # Do all of the arguments have the appropriate format? - # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. - securesystemslib.formats.URL_SCHEMA.check_match(url) - tuf.formats.LENGTH_SCHEMA.check_match(required_length) - - return _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True) - - - - - -def unsafe_download(url, required_length, fetcher): - """ - - Given the 'url' and 'required_length' of the desired file, open a connection - to 'url', download it, and return the contents of the file. Also ensure - the length of the downloaded file is up to 'required_length', and no larger. - tuf.download.safe_download() may be called if an exact download limit is - preferred. - - - url: - A URL string that represents the location of the file. - - required_length: - An integer value representing the length of the file. This is an upper - limit. - - fetcher: - An object implementing FetcherInterface that performs the network IO - operations. - - - A file object is created on disk to store the contents of 'url'. - - - tuf.ssl_commons.exceptions.DownloadLengthMismatchError, if there was a - mismatch of observed vs expected lengths while downloading the file. - - securesystemslib.exceptions.FormatError, if any of the arguments are - improperly formatted. - - Any other unforeseen runtime exception. - - - A file object that points to the contents of 'url'. - """ - - # Do all of the arguments have the appropriate format? - # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. - securesystemslib.formats.URL_SCHEMA.check_match(url) - tuf.formats.LENGTH_SCHEMA.check_match(required_length) - - return _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=False) - - - - - -def _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): +def download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): """ Given the url and length of the desired file, this function opens a @@ -180,6 +84,11 @@ def _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): A file object that points to the contents of 'url'. """ + # Do all of the arguments have the appropriate format? + # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. + securesystemslib.formats.URL_SCHEMA.check_match(url) + tuf.formats.LENGTH_SCHEMA.check_match(required_length) + # 'url.replace('\\', '/')' is needed for compatibility with Windows-based # systems, because they might use back-slashes in place of forward-slashes. # This converts it to the common format. unquote() replaces %xx escapes in a diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py index a9e4dd266b..2b7682645f 100644 --- a/tuf/client_rework/mirrors.py +++ b/tuf/client_rework/mirrors.py @@ -148,10 +148,11 @@ def _mirror_meta_download(filename: str, upper_length: int, file_mirror_errors = {} for file_mirror in file_mirrors: try: - temp_obj = download.unsafe_download( + temp_obj = download.download_file( file_mirror, upper_length, - fetcher) + fetcher, + STRICT_REQUIRED_LENGTH=False) temp_obj.seek(0) yield temp_obj @@ -177,7 +178,7 @@ def _mirror_target_download(fileinfo: str, mirrors_config: Dict, file_mirror_errors = {} for file_mirror in file_mirrors: try: - temp_obj = download.safe_download( + temp_obj = download.download_file( file_mirror, fileinfo['fileinfo']['length'], fetcher) From 3f89c018b3dac0dd71d7f8dae8834df66bfde49d Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 8 Apr 2021 12:43:37 +0300 Subject: [PATCH 16/86] Reformat client code Run black and isort over the old modules which were moved inside the client directory: - download.py - fetcher.py - mirrors.py - requests_fetcher.py Signed-off-by: Teodora Sechkova --- tuf/client_rework/download.py | 402 ++++++++++++++------------ tuf/client_rework/fetcher.py | 43 +-- tuf/client_rework/mirrors.py | 319 ++++++++++---------- tuf/client_rework/requests_fetcher.py | 299 +++++++++---------- 4 files changed, 556 insertions(+), 507 deletions(-) diff --git a/tuf/client_rework/download.py b/tuf/client_rework/download.py index 4cc855f372..be57224e4c 100644 --- a/tuf/client_rework/download.py +++ b/tuf/client_rework/download.py @@ -23,17 +23,20 @@ metadata of that file. """ + # Help with Python 3 compatibility, where the print statement is a function, an # implicit relative import is invalid, and the '/' operator performs true # division. Example: print 'hello world' raises a 'SyntaxError' exception. -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import ( + absolute_import, + division, + print_function, + unicode_literals, +) import logging -import timeit import tempfile +import timeit import securesystemslib import securesystemslib.util @@ -48,185 +51,216 @@ def download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): - """ - - Given the url and length of the desired file, this function opens a - connection to 'url' and downloads the file while ensuring its length - matches 'required_length' if 'STRICT_REQUIRED_LENGH' is True (If False, - the file's length is not checked and a slow retrieval exception is raised - if the downloaded rate falls below the acceptable rate). - - - url: - A URL string that represents the location of the file. - - required_length: - An integer value representing the length of the file. - - STRICT_REQUIRED_LENGTH: - A Boolean indicator used to signal whether we should perform strict - checking of required_length. True by default. We explicitly set this to - False when we know that we want to turn this off for downloading the - timestamp metadata, which has no signed required_length. - - - A file object is created on disk to store the contents of 'url'. - - - tuf.exceptions.DownloadLengthMismatchError, if there was a - mismatch of observed vs expected lengths while downloading the file. - - securesystemslib.exceptions.FormatError, if any of the arguments are - improperly formatted. - - Any other unforeseen runtime exception. - - - A file object that points to the contents of 'url'. - """ - # Do all of the arguments have the appropriate format? - # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. - securesystemslib.formats.URL_SCHEMA.check_match(url) - tuf.formats.LENGTH_SCHEMA.check_match(required_length) - - # 'url.replace('\\', '/')' is needed for compatibility with Windows-based - # systems, because they might use back-slashes in place of forward-slashes. - # This converts it to the common format. unquote() replaces %xx escapes in a - # url with their single-character equivalent. A back-slash may be encoded as - # %5c in the url, which should also be replaced with a forward slash. - url = six.moves.urllib.parse.unquote(url).replace('\\', '/') - logger.info('Downloading: ' + repr(url)) - - # This is the temporary file that we will return to contain the contents of - # the downloaded file. - temp_file = tempfile.TemporaryFile() - - average_download_speed = 0 - number_of_bytes_received = 0 - - try: - chunks = fetcher.fetch(url, required_length) - start_time = timeit.default_timer() - for chunk in chunks: - - stop_time = timeit.default_timer() - temp_file.write(chunk) - - # Measure the average download speed. - number_of_bytes_received += len(chunk) - seconds_spent_receiving = stop_time - start_time - average_download_speed = number_of_bytes_received / seconds_spent_receiving - - if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: - logger.debug('The average download speed dropped below the minimum' - ' average download speed set in tuf.settings.py. Stopping the' - ' download!') - break - - else: - logger.debug('The average download speed has not dipped below the' - ' minimum average download speed set in tuf.settings.py.') - - # Does the total number of downloaded bytes match the required length? - _check_downloaded_length(number_of_bytes_received, required_length, - STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH, - average_download_speed=average_download_speed) - - except Exception: - # Close 'temp_file'. Any written data is lost. - temp_file.close() - logger.debug('Could not download URL: ' + repr(url)) - raise - - else: - return temp_file - - - - -def _check_downloaded_length(total_downloaded, required_length, - STRICT_REQUIRED_LENGTH=True, - average_download_speed=None): - """ - - A helper function which checks whether the total number of downloaded bytes - matches our expectation. - - - total_downloaded: - The total number of bytes supposedly downloaded for the file in question. - - required_length: - The total number of bytes expected of the file as seen from its metadata. - The Timestamp role is always downloaded without a known file length, and - the Root role when the client cannot download any of the required - top-level roles. In both cases, 'required_length' is actually an upper - limit on the length of the downloaded file. - - STRICT_REQUIRED_LENGTH: - A Boolean indicator used to signal whether we should perform strict - checking of required_length. True by default. We explicitly set this to - False when we know that we want to turn this off for downloading the - timestamp metadata, which has no signed required_length. - - average_download_speed: - The average download speed for the downloaded file. - - - None. - - - securesystemslib.exceptions.DownloadLengthMismatchError, if - STRICT_REQUIRED_LENGTH is True and total_downloaded is not equal - required_length. - - tuf.exceptions.SlowRetrievalError, if the total downloaded was - done in less than the acceptable download speed (as set in - tuf.settings.py). - - - None. - """ - - if total_downloaded == required_length: - logger.info('Downloaded ' + str(total_downloaded) + ' bytes out of the' - ' expected ' + str(required_length) + ' bytes.') - - else: - difference_in_bytes = abs(total_downloaded - required_length) - - # What we downloaded is not equal to the required length, but did we ask - # for strict checking of required length? - if STRICT_REQUIRED_LENGTH: - logger.info('Downloaded ' + str(total_downloaded) + ' bytes, but' - ' expected ' + str(required_length) + ' bytes. There is a difference' - ' of ' + str(difference_in_bytes) + ' bytes.') + """ + + Given the url and length of the desired file, this function opens a + connection to 'url' and downloads the file while ensuring its length + matches 'required_length' if 'STRICT_REQUIRED_LENGH' is True (If False, + the file's length is not checked and a slow retrieval exception is raised + if the downloaded rate falls below the acceptable rate). + + + url: + A URL string that represents the location of the file. + + required_length: + An integer value representing the length of the file. + + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + + A file object is created on disk to store the contents of 'url'. + + + tuf.exceptions.DownloadLengthMismatchError, if there was a + mismatch of observed vs expected lengths while downloading the file. + + securesystemslib.exceptions.FormatError, if any of the arguments are + improperly formatted. + + Any other unforeseen runtime exception. + + + A file object that points to the contents of 'url'. + """ + # Do all of the arguments have the appropriate format? + # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. + securesystemslib.formats.URL_SCHEMA.check_match(url) + tuf.formats.LENGTH_SCHEMA.check_match(required_length) + + # 'url.replace('\\', '/')' is needed for compatibility with Windows-based + # systems, because they might use back-slashes in place of forward-slashes. + # This converts it to the common format. unquote() replaces %xx escapes in a + # url with their single-character equivalent. A back-slash may be encoded as + # %5c in the url, which should also be replaced with a forward slash. + url = six.moves.urllib.parse.unquote(url).replace("\\", "/") + logger.info("Downloading: " + repr(url)) + + # This is the temporary file that we will return to contain the contents of + # the downloaded file. + temp_file = tempfile.TemporaryFile() + + average_download_speed = 0 + number_of_bytes_received = 0 + + try: + chunks = fetcher.fetch(url, required_length) + start_time = timeit.default_timer() + for chunk in chunks: + + stop_time = timeit.default_timer() + temp_file.write(chunk) + + # Measure the average download speed. + number_of_bytes_received += len(chunk) + seconds_spent_receiving = stop_time - start_time + average_download_speed = ( + number_of_bytes_received / seconds_spent_receiving + ) + + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + logger.debug( + "The average download speed dropped below the minimum" + " average download speed set in tuf.settings.py. Stopping the" + " download!" + ) + break + + else: + logger.debug( + "The average download speed has not dipped below the" + " minimum average download speed set in tuf.settings.py." + ) + + # Does the total number of downloaded bytes match the required length? + _check_downloaded_length( + number_of_bytes_received, + required_length, + STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH, + average_download_speed=average_download_speed, + ) + + except Exception: + # Close 'temp_file'. Any written data is lost. + temp_file.close() + logger.debug("Could not download URL: " + repr(url)) + raise - # If the average download speed is below a certain threshold, we flag - # this as a possible slow-retrieval attack. - logger.debug('Average download speed: ' + repr(average_download_speed)) - logger.debug('Minimum average download speed: ' + repr(tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED)) - - if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: - raise tuf.exceptions.SlowRetrievalError(average_download_speed) - - else: - logger.debug('Good average download speed: ' + - repr(average_download_speed) + ' bytes per second') - - raise tuf.exceptions.DownloadLengthMismatchError(required_length, total_downloaded) + else: + return temp_file + + +def _check_downloaded_length( + total_downloaded, + required_length, + STRICT_REQUIRED_LENGTH=True, + average_download_speed=None, +): + """ + + A helper function which checks whether the total number of downloaded bytes + matches our expectation. + + + total_downloaded: + The total number of bytes supposedly downloaded for the file in question. + + required_length: + The total number of bytes expected of the file as seen from its metadata. + The Timestamp role is always downloaded without a known file length, and + the Root role when the client cannot download any of the required + top-level roles. In both cases, 'required_length' is actually an upper + limit on the length of the downloaded file. + + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + average_download_speed: + The average download speed for the downloaded file. + + + None. + + + securesystemslib.exceptions.DownloadLengthMismatchError, if + STRICT_REQUIRED_LENGTH is True and total_downloaded is not equal + required_length. + + tuf.exceptions.SlowRetrievalError, if the total downloaded was + done in less than the acceptable download speed (as set in + tuf.settings.py). + + + None. + """ + + if total_downloaded == required_length: + logger.info( + "Downloaded " + str(total_downloaded) + " bytes out of the" + " expected " + str(required_length) + " bytes." + ) else: - # We specifically disabled strict checking of required length, but we - # will log a warning anyway. This is useful when we wish to download the - # Timestamp or Root metadata, for which we have no signed metadata; so, - # we must guess a reasonable required_length for it. - if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: - raise tuf.exceptions.SlowRetrievalError(average_download_speed) - - else: - logger.debug('Good average download speed: ' + - repr(average_download_speed) + ' bytes per second') - - logger.info('Downloaded ' + str(total_downloaded) + ' bytes out of an' - ' upper limit of ' + str(required_length) + ' bytes.') + difference_in_bytes = abs(total_downloaded - required_length) + + # What we downloaded is not equal to the required length, but did we ask + # for strict checking of required length? + if STRICT_REQUIRED_LENGTH: + logger.info( + "Downloaded " + str(total_downloaded) + " bytes, but" + " expected " + + str(required_length) + + " bytes. There is a difference" + " of " + str(difference_in_bytes) + " bytes." + ) + + # If the average download speed is below a certain threshold, we flag + # this as a possible slow-retrieval attack. + logger.debug( + "Average download speed: " + repr(average_download_speed) + ) + logger.debug( + "Minimum average download speed: " + + repr(tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED) + ) + + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + raise tuf.exceptions.SlowRetrievalError(average_download_speed) + + else: + logger.debug( + "Good average download speed: " + + repr(average_download_speed) + + " bytes per second" + ) + + raise tuf.exceptions.DownloadLengthMismatchError( + required_length, total_downloaded + ) + + else: + # We specifically disabled strict checking of required length, but we + # will log a warning anyway. This is useful when we wish to download the + # Timestamp or Root metadata, for which we have no signed metadata; so, + # we must guess a reasonable required_length for it. + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + raise tuf.exceptions.SlowRetrievalError(average_download_speed) + + else: + logger.debug( + "Good average download speed: " + + repr(average_download_speed) + + " bytes per second" + ) + + logger.info( + "Downloaded " + str(total_downloaded) + " bytes out of an" + " upper limit of " + str(required_length) + " bytes." + ) diff --git a/tuf/client_rework/fetcher.py b/tuf/client_rework/fetcher.py index 8768bdd4b9..2b6de6f837 100644 --- a/tuf/client_rework/fetcher.py +++ b/tuf/client_rework/fetcher.py @@ -7,32 +7,33 @@ # Imports import abc + # Classes -class FetcherInterface(): - """Defines an interface for abstract network download. +class FetcherInterface: + """Defines an interface for abstract network download. - By providing a concrete implementation of the abstract interface, - users of the framework can plug-in their preferred/customized - network stack. - """ + By providing a concrete implementation of the abstract interface, + users of the framework can plug-in their preferred/customized + network stack. + """ - __metaclass__ = abc.ABCMeta + __metaclass__ = abc.ABCMeta - @abc.abstractmethod - def fetch(self, url, required_length): - """Fetches the contents of HTTP/HTTPS url from a remote server. + @abc.abstractmethod + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. - Ensures the length of the downloaded data is up to 'required_length'. + Ensures the length of the downloaded data is up to 'required_length'. - Arguments: - url: A URL string that represents a file location. - required_length: An integer value representing the file length in bytes. + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in bytes. - Raises: - tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. - tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. - Returns: - A bytes iterator - """ - raise NotImplementedError # pragma: no cover + Returns: + A bytes iterator + """ + raise NotImplementedError # pragma: no cover diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py index 2b7682645f..afcbdb9b0f 100644 --- a/tuf/client_rework/mirrors.py +++ b/tuf/client_rework/mirrors.py @@ -22,174 +22,179 @@ of the file with respect to the base url. """ + # Help with Python 3 compatibility, where the print statement is a function, an # implicit relative import is invalid, and the '/' operator performs true # division. Example: print 'hello world' raises a 'SyntaxError' exception. -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from typing import TextIO, BinaryIO, Dict +from __future__ import ( + absolute_import, + division, + print_function, + unicode_literals, +) import os - -import tuf -import tuf.formats -import tuf.client_rework.download as download +from typing import BinaryIO, Dict, TextIO import securesystemslib import six +import tuf +import tuf.client_rework.download as download +import tuf.formats + # The type of file to be downloaded from a repository. The # 'get_list_of_mirrors' function supports these file types. -_SUPPORTED_FILE_TYPES = ['meta', 'target'] +_SUPPORTED_FILE_TYPES = ["meta", "target"] def get_list_of_mirrors(file_type, file_path, mirrors_dict): - """ - - Get a list of mirror urls from a mirrors dictionary, provided the type - and the path of the file with respect to the base url. - - - file_type: - Type of data needed for download, must correspond to one of the strings - in the list ['meta', 'target']. 'meta' for metadata file type or - 'target' for target file type. It should correspond to - NAME_SCHEMA format. - - file_path: - A relative path to the file that corresponds to RELPATH_SCHEMA format. - Ex: 'http://url_prefix/targets_path/file_path' - - mirrors_dict: - A mirrors_dict object that corresponds to MIRRORDICT_SCHEMA, where - keys are strings and values are MIRROR_SCHEMA. An example format - of MIRROR_SCHEMA: - - {'url_prefix': 'http://localhost:8001', - 'metadata_path': 'metadata/', - 'targets_path': 'targets/', - 'confined_target_dirs': ['targets/snapshot1/', ...], - 'custom': {...}} - - The 'custom' field is optional. - - - securesystemslib.exceptions.Error, on unsupported 'file_type'. - - securesystemslib.exceptions.FormatError, on bad argument. - - - List of mirror urls corresponding to the file_type and file_path. If no - match is found, empty list is returned. - """ - - # Checking if all the arguments have appropriate format. - tuf.formats.RELPATH_SCHEMA.check_match(file_path) - tuf.formats.MIRRORDICT_SCHEMA.check_match(mirrors_dict) - securesystemslib.formats.NAME_SCHEMA.check_match(file_type) - - # Verify 'file_type' is supported. - if file_type not in _SUPPORTED_FILE_TYPES: - raise securesystemslib.exceptions.Error('Invalid file_type argument.' - ' Supported file types: ' + repr(_SUPPORTED_FILE_TYPES)) - path_key = 'metadata_path' if file_type == 'meta' else 'targets_path' - - # Reference to 'securesystemslib.util.file_in_confined_directories()' (improve - # readability). This function checks whether a mirror should serve a file to - # the client. A client may be confined to certain paths on a repository - # mirror when fetching target files. This field may be set by the client - # when the repository mirror is added to the 'tuf.client.updater.Updater' - # object. - in_confined_directory = securesystemslib.util.file_in_confined_directories - - list_of_mirrors = [] - for junk, mirror_info in six.iteritems(mirrors_dict): - # Does mirror serve this file type at all? - path = mirror_info.get(path_key) - if path is None: - continue - - # for targets, ensure directory confinement - if path_key == 'targets_path': - full_filepath = os.path.join(path, file_path) - confined_target_dirs = mirror_info.get('confined_target_dirs') - # confined_target_dirs is an optional field - if confined_target_dirs and not in_confined_directory(full_filepath, - confined_target_dirs): - continue - - # urllib.quote(string) replaces special characters in string using the %xx - # escape. This is done to avoid parsing issues of the URL on the server - # side. Do *NOT* pass URLs with Unicode characters without first encoding - # the URL as UTF-8. We need a long-term solution with #61. - # http://bugs.python.org/issue1712522 - file_path = six.moves.urllib.parse.quote(file_path) - url = os.path.join(mirror_info['url_prefix'], path, file_path) - - # The above os.path.join() result as well as input file_path may be - # invalid on windows (might contain both separator types), see #1077. - # Make sure the URL doesn't contain backward slashes on Windows. - list_of_mirrors.append(url.replace('\\', '/')) - - return list_of_mirrors - - -def _mirror_meta_download(filename: str, upper_length: int, - mirrors_config: Dict, - fetcher: "FetcherInterface") -> TextIO: - """ - Download metadata file from the list of metadata mirrors - """ - file_mirrors = get_list_of_mirrors('meta', filename, mirrors_config) - - file_mirror_errors = {} - for file_mirror in file_mirrors: - try: - temp_obj = download.download_file( - file_mirror, - upper_length, - fetcher, - STRICT_REQUIRED_LENGTH=False) - - temp_obj.seek(0) - yield temp_obj - - except Exception as exception: - file_mirror_errors[file_mirror] = exception - - finally: - if file_mirror_errors: - raise tuf.exceptions.NoWorkingMirrorError( - file_mirror_errors) - - -def _mirror_target_download(fileinfo: str, mirrors_config: Dict, - fetcher: "FetcherInterface") -> BinaryIO: - """ - Download target file from the list of target mirrors - """ - # full_filename = _get_full_name(filename) - file_mirrors = get_list_of_mirrors('target', fileinfo['filepath'], - mirrors_config) - - file_mirror_errors = {} - for file_mirror in file_mirrors: - try: - temp_obj = download.download_file( - file_mirror, - fileinfo['fileinfo']['length'], - fetcher) - - temp_obj.seek(0) - yield temp_obj - - except Exception as exception: - file_mirror_errors[file_mirror] = exception - - finally: - if file_mirror_errors: - raise tuf.exceptions.NoWorkingMirrorError( - file_mirror_errors) + """ + + Get a list of mirror urls from a mirrors dictionary, provided the type + and the path of the file with respect to the base url. + + + file_type: + Type of data needed for download, must correspond to one of the strings + in the list ['meta', 'target']. 'meta' for metadata file type or + 'target' for target file type. It should correspond to + NAME_SCHEMA format. + + file_path: + A relative path to the file that corresponds to RELPATH_SCHEMA format. + Ex: 'http://url_prefix/targets_path/file_path' + + mirrors_dict: + A mirrors_dict object that corresponds to MIRRORDICT_SCHEMA, where + keys are strings and values are MIRROR_SCHEMA. An example format + of MIRROR_SCHEMA: + + {'url_prefix': 'http://localhost:8001', + 'metadata_path': 'metadata/', + 'targets_path': 'targets/', + 'confined_target_dirs': ['targets/snapshot1/', ...], + 'custom': {...}} + + The 'custom' field is optional. + + + securesystemslib.exceptions.Error, on unsupported 'file_type'. + + securesystemslib.exceptions.FormatError, on bad argument. + + + List of mirror urls corresponding to the file_type and file_path. If no + match is found, empty list is returned. + """ + + # Checking if all the arguments have appropriate format. + tuf.formats.RELPATH_SCHEMA.check_match(file_path) + tuf.formats.MIRRORDICT_SCHEMA.check_match(mirrors_dict) + securesystemslib.formats.NAME_SCHEMA.check_match(file_type) + + # Verify 'file_type' is supported. + if file_type not in _SUPPORTED_FILE_TYPES: + raise securesystemslib.exceptions.Error( + "Invalid file_type argument." + " Supported file types: " + repr(_SUPPORTED_FILE_TYPES) + ) + path_key = "metadata_path" if file_type == "meta" else "targets_path" + + # Reference to 'securesystemslib.util.file_in_confined_directories()' (improve + # readability). This function checks whether a mirror should serve a file to + # the client. A client may be confined to certain paths on a repository + # mirror when fetching target files. This field may be set by the client + # when the repository mirror is added to the 'tuf.client.updater.Updater' + # object. + in_confined_directory = securesystemslib.util.file_in_confined_directories + + list_of_mirrors = [] + for junk, mirror_info in six.iteritems(mirrors_dict): + # Does mirror serve this file type at all? + path = mirror_info.get(path_key) + if path is None: + continue + + # for targets, ensure directory confinement + if path_key == "targets_path": + full_filepath = os.path.join(path, file_path) + confined_target_dirs = mirror_info.get("confined_target_dirs") + # confined_target_dirs is an optional field + if confined_target_dirs and not in_confined_directory( + full_filepath, confined_target_dirs + ): + continue + + # urllib.quote(string) replaces special characters in string using the %xx + # escape. This is done to avoid parsing issues of the URL on the server + # side. Do *NOT* pass URLs with Unicode characters without first encoding + # the URL as UTF-8. We need a long-term solution with #61. + # http://bugs.python.org/issue1712522 + file_path = six.moves.urllib.parse.quote(file_path) + url = os.path.join(mirror_info["url_prefix"], path, file_path) + + # The above os.path.join() result as well as input file_path may be + # invalid on windows (might contain both separator types), see #1077. + # Make sure the URL doesn't contain backward slashes on Windows. + list_of_mirrors.append(url.replace("\\", "/")) + + return list_of_mirrors + + +def _mirror_meta_download( + filename: str, + upper_length: int, + mirrors_config: Dict, + fetcher: "FetcherInterface", +) -> TextIO: + """ + Download metadata file from the list of metadata mirrors + """ + file_mirrors = get_list_of_mirrors("meta", filename, mirrors_config) + + file_mirror_errors = {} + for file_mirror in file_mirrors: + try: + temp_obj = download.download_file( + file_mirror, upper_length, fetcher, STRICT_REQUIRED_LENGTH=False + ) + + temp_obj.seek(0) + yield temp_obj + + except Exception as exception: + file_mirror_errors[file_mirror] = exception + + finally: + if file_mirror_errors: + raise tuf.exceptions.NoWorkingMirrorError(file_mirror_errors) + + +def _mirror_target_download( + fileinfo: str, mirrors_config: Dict, fetcher: "FetcherInterface" +) -> BinaryIO: + """ + Download target file from the list of target mirrors + """ + # full_filename = _get_full_name(filename) + file_mirrors = get_list_of_mirrors( + "target", fileinfo["filepath"], mirrors_config + ) + + file_mirror_errors = {} + for file_mirror in file_mirrors: + try: + temp_obj = download.download_file( + file_mirror, fileinfo["fileinfo"]["length"], fetcher + ) + + temp_obj.seek(0) + yield temp_obj + + except Exception as exception: + file_mirror_errors[file_mirror] = exception + + finally: + if file_mirror_errors: + raise tuf.exceptions.NoWorkingMirrorError(file_mirror_errors) diff --git a/tuf/client_rework/requests_fetcher.py b/tuf/client_rework/requests_fetcher.py index 8074890d25..6f5e89ec4e 100644 --- a/tuf/client_rework/requests_fetcher.py +++ b/tuf/client_rework/requests_fetcher.py @@ -5,17 +5,16 @@ library. """ -# Imports -import requests -import six import logging import time +# Imports +import requests +import six import urllib3.exceptions import tuf.exceptions import tuf.settings - from tuf.client_rework.fetcher import FetcherInterface # Globals @@ -23,151 +22,161 @@ # Classess class RequestsFetcher(FetcherInterface): - """A concrete implementation of FetcherInterface based on the Requests + """A concrete implementation of FetcherInterface based on the Requests library. Attributes: _sessions: A dictionary of Requests.Session objects storing a separate session per scheme+hostname combination. - """ - - def __init__(self): - # From http://docs.python-requests.org/en/master/user/advanced/#session-objects: - # - # "The Session object allows you to persist certain parameters across - # requests. It also persists cookies across all requests made from the - # Session instance, and will use urllib3's connection pooling. So if you're - # making several requests to the same host, the underlying TCP connection - # will be reused, which can result in a significant performance increase - # (see HTTP persistent connection)." - # - # NOTE: We use a separate requests.Session per scheme+hostname combination, - # in order to reuse connections to the same hostname to improve efficiency, - # but avoiding sharing state between different hosts-scheme combinations to - # minimize subtle security issues. Some cookies may not be HTTP-safe. - self._sessions = {} - - - def fetch(self, url, required_length): - """Fetches the contents of HTTP/HTTPS url from a remote server. - - Ensures the length of the downloaded data is up to 'required_length'. - - Arguments: - url: A URL string that represents a file location. - required_length: An integer value representing the file length in bytes. - - Raises: - tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. - tuf.exceptions.FetcherHTTPError: An HTTP error code is received. - - Returns: - A bytes iterator - """ - # Get a customized session for each new schema+hostname combination. - session = self._get_session(url) - - # Get the requests.Response object for this URL. - # - # Defer downloading the response body with stream=True. - # Always set the timeout. This timeout value is interpreted by requests as: - # - connect timeout (max delay before first byte is received) - # - read (gap) timeout (max delay between bytes received) - response = session.get(url, stream=True, - timeout=tuf.settings.SOCKET_TIMEOUT) - # Check response status. - try: - response.raise_for_status() - except requests.HTTPError as e: - response.close() - status = e.response.status_code - raise tuf.exceptions.FetcherHTTPError(str(e), status) - - - # Define a generator function to be returned by fetch. This way the caller - # of fetch can differentiate between connection and actual data download - # and measure download times accordingly. - def chunks(): - try: - bytes_received = 0 - while True: - # We download a fixed chunk of data in every round. This is so that we - # can defend against slow retrieval attacks. Furthermore, we do not - # wish to download an extremely large file in one shot. - # Before beginning the round, sleep (if set) for a short amount of - # time so that the CPU is not hogged in the while loop. - if tuf.settings.SLEEP_BEFORE_ROUND: - time.sleep(tuf.settings.SLEEP_BEFORE_ROUND) - - read_amount = min( - tuf.settings.CHUNK_SIZE, required_length - bytes_received) - - # NOTE: This may not handle some servers adding a Content-Encoding - # header, which may cause urllib3 to misbehave: - # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 - data = response.raw.read(read_amount) - bytes_received += len(data) - - # We might have no more data to read. Check number of bytes downloaded. - if not data: - logger.debug('Downloaded ' + repr(bytes_received) + '/' + - repr(required_length) + ' bytes.') - - # Finally, we signal that the download is complete. - break - - yield data - - if bytes_received >= required_length: - break - - except urllib3.exceptions.ReadTimeoutError as e: - raise tuf.exceptions.SlowRetrievalError(str(e)) - - finally: - response.close() - - return chunks() - - - - def _get_session(self, url): - """Returns a different customized requests.Session per schema+hostname - combination. """ - # Use a different requests.Session per schema+hostname combination, to - # reuse connections while minimizing subtle security issues. - parsed_url = six.moves.urllib.parse.urlparse(url) - - if not parsed_url.scheme or not parsed_url.hostname: - raise tuf.exceptions.URLParsingError( - 'Could not get scheme and hostname from URL: ' + url) - - session_index = parsed_url.scheme + '+' + parsed_url.hostname - - logger.debug('url: ' + url) - logger.debug('session index: ' + session_index) - - session = self._sessions.get(session_index) - - if not session: - session = requests.Session() - self._sessions[session_index] = session - - # Attach some default headers to every Session. - requests_user_agent = session.headers['User-Agent'] - # Follows the RFC: https://tools.ietf.org/html/rfc7231#section-5.5.3 - tuf_user_agent = 'tuf/' + tuf.__version__ + ' ' + requests_user_agent - session.headers.update({ - # Tell the server not to compress or modify anything. - # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#Directives - 'Accept-Encoding': 'identity', - # The TUF user agent. - 'User-Agent': tuf_user_agent}) - - logger.debug('Made new session for ' + session_index) - - else: - logger.debug('Reusing session for ' + session_index) - return session + def __init__(self): + # From http://docs.python-requests.org/en/master/user/advanced/#session-objects: + # + # "The Session object allows you to persist certain parameters across + # requests. It also persists cookies across all requests made from the + # Session instance, and will use urllib3's connection pooling. So if you're + # making several requests to the same host, the underlying TCP connection + # will be reused, which can result in a significant performance increase + # (see HTTP persistent connection)." + # + # NOTE: We use a separate requests.Session per scheme+hostname combination, + # in order to reuse connections to the same hostname to improve efficiency, + # but avoiding sharing state between different hosts-scheme combinations to + # minimize subtle security issues. Some cookies may not be HTTP-safe. + self._sessions = {} + + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in bytes. + + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ + # Get a customized session for each new schema+hostname combination. + session = self._get_session(url) + + # Get the requests.Response object for this URL. + # + # Defer downloading the response body with stream=True. + # Always set the timeout. This timeout value is interpreted by requests as: + # - connect timeout (max delay before first byte is received) + # - read (gap) timeout (max delay between bytes received) + response = session.get( + url, stream=True, timeout=tuf.settings.SOCKET_TIMEOUT + ) + # Check response status. + try: + response.raise_for_status() + except requests.HTTPError as e: + response.close() + status = e.response.status_code + raise tuf.exceptions.FetcherHTTPError(str(e), status) + + # Define a generator function to be returned by fetch. This way the caller + # of fetch can differentiate between connection and actual data download + # and measure download times accordingly. + def chunks(): + try: + bytes_received = 0 + while True: + # We download a fixed chunk of data in every round. This is so that we + # can defend against slow retrieval attacks. Furthermore, we do not + # wish to download an extremely large file in one shot. + # Before beginning the round, sleep (if set) for a short amount of + # time so that the CPU is not hogged in the while loop. + if tuf.settings.SLEEP_BEFORE_ROUND: + time.sleep(tuf.settings.SLEEP_BEFORE_ROUND) + + read_amount = min( + tuf.settings.CHUNK_SIZE, + required_length - bytes_received, + ) + + # NOTE: This may not handle some servers adding a Content-Encoding + # header, which may cause urllib3 to misbehave: + # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 + data = response.raw.read(read_amount) + bytes_received += len(data) + + # We might have no more data to read. Check number of bytes downloaded. + if not data: + logger.debug( + "Downloaded " + + repr(bytes_received) + + "/" + + repr(required_length) + + " bytes." + ) + + # Finally, we signal that the download is complete. + break + + yield data + + if bytes_received >= required_length: + break + + except urllib3.exceptions.ReadTimeoutError as e: + raise tuf.exceptions.SlowRetrievalError(str(e)) + + finally: + response.close() + + return chunks() + + def _get_session(self, url): + """Returns a different customized requests.Session per schema+hostname + combination. + """ + # Use a different requests.Session per schema+hostname combination, to + # reuse connections while minimizing subtle security issues. + parsed_url = six.moves.urllib.parse.urlparse(url) + + if not parsed_url.scheme or not parsed_url.hostname: + raise tuf.exceptions.URLParsingError( + "Could not get scheme and hostname from URL: " + url + ) + + session_index = parsed_url.scheme + "+" + parsed_url.hostname + + logger.debug("url: " + url) + logger.debug("session index: " + session_index) + + session = self._sessions.get(session_index) + + if not session: + session = requests.Session() + self._sessions[session_index] = session + + # Attach some default headers to every Session. + requests_user_agent = session.headers["User-Agent"] + # Follows the RFC: https://tools.ietf.org/html/rfc7231#section-5.5.3 + tuf_user_agent = ( + "tuf/" + tuf.__version__ + " " + requests_user_agent + ) + session.headers.update( + { + # Tell the server not to compress or modify anything. + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#Directives + "Accept-Encoding": "identity", + # The TUF user agent. + "User-Agent": tuf_user_agent, + } + ) + + logger.debug("Made new session for " + session_index) + + else: + logger.debug("Reusing session for " + session_index) + + return session From 1c6ab320cdcd4038c686f0d5bdfe6984a1af220d Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 8 Apr 2021 12:56:29 +0300 Subject: [PATCH 17/86] Fix pylint C0301: Line too long Fix line lenght exceeding 80 characters since the black formatter does not wrap lines inside comments. Signed-off-by: Teodora Sechkova --- tuf/client_rework/download.py | 33 +++++++------- tuf/client_rework/fetcher.py | 12 ++--- tuf/client_rework/mirrors.py | 20 ++++----- tuf/client_rework/requests_fetcher.py | 63 +++++++++++++++------------ 4 files changed, 70 insertions(+), 58 deletions(-) diff --git a/tuf/client_rework/download.py b/tuf/client_rework/download.py index be57224e4c..d04cc2baa3 100644 --- a/tuf/client_rework/download.py +++ b/tuf/client_rework/download.py @@ -94,9 +94,10 @@ def download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): # 'url.replace('\\', '/')' is needed for compatibility with Windows-based # systems, because they might use back-slashes in place of forward-slashes. - # This converts it to the common format. unquote() replaces %xx escapes in a - # url with their single-character equivalent. A back-slash may be encoded as - # %5c in the url, which should also be replaced with a forward slash. + # This converts it to the common format. unquote() replaces %xx escapes in + # a url with their single-character equivalent. A back-slash may be + # encoded as %5c in the url, which should also be replaced with a forward + # slash. url = six.moves.urllib.parse.unquote(url).replace("\\", "/") logger.info("Downloading: " + repr(url)) @@ -125,8 +126,8 @@ def download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: logger.debug( "The average download speed dropped below the minimum" - " average download speed set in tuf.settings.py. Stopping the" - " download!" + " average download speed set in tuf.settings.py." + " Stopping the download!" ) break @@ -162,15 +163,16 @@ def _check_downloaded_length( ): """ - A helper function which checks whether the total number of downloaded bytes - matches our expectation. + A helper function which checks whether the total number of downloaded + bytes matches our expectation. total_downloaded: - The total number of bytes supposedly downloaded for the file in question. + The total number of bytes supposedly downloaded for the file in + question. required_length: - The total number of bytes expected of the file as seen from its metadata. + The total number of bytes expected of the file as seen from its metadata The Timestamp role is always downloaded without a known file length, and the Root role when the client cannot download any of the required top-level roles. In both cases, 'required_length' is actually an upper @@ -221,8 +223,8 @@ def _check_downloaded_length( " of " + str(difference_in_bytes) + " bytes." ) - # If the average download speed is below a certain threshold, we flag - # this as a possible slow-retrieval attack. + # If the average download speed is below a certain threshold, we + # flag this as a possible slow-retrieval attack. logger.debug( "Average download speed: " + repr(average_download_speed) ) @@ -246,10 +248,11 @@ def _check_downloaded_length( ) else: - # We specifically disabled strict checking of required length, but we - # will log a warning anyway. This is useful when we wish to download the - # Timestamp or Root metadata, for which we have no signed metadata; so, - # we must guess a reasonable required_length for it. + # We specifically disabled strict checking of required length, but + # we will log a warning anyway. This is useful when we wish to + # download the Timestamp or Root metadata, for which we have no + # signed metadata; so, we must guess a reasonable required_length + # for it. if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: raise tuf.exceptions.SlowRetrievalError(average_download_speed) diff --git a/tuf/client_rework/fetcher.py b/tuf/client_rework/fetcher.py index 2b6de6f837..8a6cae34d7 100644 --- a/tuf/client_rework/fetcher.py +++ b/tuf/client_rework/fetcher.py @@ -26,14 +26,16 @@ def fetch(self, url, required_length): Ensures the length of the downloaded data is up to 'required_length'. Arguments: - url: A URL string that represents a file location. - required_length: An integer value representing the file length in bytes. + url: A URL string that represents a file location. + required_length: An integer value representing the file length in + bytes. Raises: - tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. - tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving + data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. Returns: - A bytes iterator + A bytes iterator """ raise NotImplementedError # pragma: no cover diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py index afcbdb9b0f..0bdf07e2e6 100644 --- a/tuf/client_rework/mirrors.py +++ b/tuf/client_rework/mirrors.py @@ -101,12 +101,12 @@ def get_list_of_mirrors(file_type, file_path, mirrors_dict): ) path_key = "metadata_path" if file_type == "meta" else "targets_path" - # Reference to 'securesystemslib.util.file_in_confined_directories()' (improve - # readability). This function checks whether a mirror should serve a file to - # the client. A client may be confined to certain paths on a repository - # mirror when fetching target files. This field may be set by the client - # when the repository mirror is added to the 'tuf.client.updater.Updater' - # object. + # Reference to 'securesystemslib.util.file_in_confined_directories()' + # (improve readability). This function checks whether a mirror should + # serve a file to the client. A client may be confined to certain paths + # on a repository mirror when fetching target files. This field may be set + # by the client when the repository mirror is added to the + # 'tuf.client.updater.Updater' object. in_confined_directory = securesystemslib.util.file_in_confined_directories list_of_mirrors = [] @@ -126,10 +126,10 @@ def get_list_of_mirrors(file_type, file_path, mirrors_dict): ): continue - # urllib.quote(string) replaces special characters in string using the %xx - # escape. This is done to avoid parsing issues of the URL on the server - # side. Do *NOT* pass URLs with Unicode characters without first encoding - # the URL as UTF-8. We need a long-term solution with #61. + # urllib.quote(string) replaces special characters in string using + # the %xx escape. This is done to avoid parsing issues of the URL + # on the server side. Do *NOT* pass URLs with Unicode characters without + # first encoding the URL as UTF-8. Needed a long-term solution with #61. # http://bugs.python.org/issue1712522 file_path = six.moves.urllib.parse.quote(file_path) url = os.path.join(mirror_info["url_prefix"], path, file_path) diff --git a/tuf/client_rework/requests_fetcher.py b/tuf/client_rework/requests_fetcher.py index 6f5e89ec4e..545a23feff 100644 --- a/tuf/client_rework/requests_fetcher.py +++ b/tuf/client_rework/requests_fetcher.py @@ -26,24 +26,25 @@ class RequestsFetcher(FetcherInterface): library. Attributes: - _sessions: A dictionary of Requests.Session objects storing a separate - session per scheme+hostname combination. + _sessions: A dictionary of Requests.Session objects storing a separate + session per scheme+hostname combination. """ def __init__(self): - # From http://docs.python-requests.org/en/master/user/advanced/#session-objects: + # http://docs.python-requests.org/en/master/user/advanced/#session-objects: # # "The Session object allows you to persist certain parameters across # requests. It also persists cookies across all requests made from the - # Session instance, and will use urllib3's connection pooling. So if you're - # making several requests to the same host, the underlying TCP connection - # will be reused, which can result in a significant performance increase - # (see HTTP persistent connection)." + # Session instance, and will use urllib3's connection pooling. So if + # you're making several requests to the same host, the underlying TCP + # connection will be reused, which can result in a significant + # performance increase (see HTTP persistent connection)." # - # NOTE: We use a separate requests.Session per scheme+hostname combination, - # in order to reuse connections to the same hostname to improve efficiency, - # but avoiding sharing state between different hosts-scheme combinations to - # minimize subtle security issues. Some cookies may not be HTTP-safe. + # NOTE: We use a separate requests.Session per scheme+hostname + # combination, in order to reuse connections to the same hostname to + # improve efficiency, but avoiding sharing state between different + # hosts-scheme combinations to minimize subtle security issues. + # Some cookies may not be HTTP-safe. self._sessions = {} def fetch(self, url, required_length): @@ -52,15 +53,17 @@ def fetch(self, url, required_length): Ensures the length of the downloaded data is up to 'required_length'. Arguments: - url: A URL string that represents a file location. - required_length: An integer value representing the file length in bytes. + url: A URL string that represents a file location. + required_length: An integer value representing the file length in + bytes. Raises: - tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. - tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving + data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. Returns: - A bytes iterator + A bytes iterator """ # Get a customized session for each new schema+hostname combination. session = self._get_session(url) @@ -68,7 +71,8 @@ def fetch(self, url, required_length): # Get the requests.Response object for this URL. # # Defer downloading the response body with stream=True. - # Always set the timeout. This timeout value is interpreted by requests as: + # Always set the timeout. This timeout value is interpreted by + # requests as: # - connect timeout (max delay before first byte is received) # - read (gap) timeout (max delay between bytes received) response = session.get( @@ -82,18 +86,19 @@ def fetch(self, url, required_length): status = e.response.status_code raise tuf.exceptions.FetcherHTTPError(str(e), status) - # Define a generator function to be returned by fetch. This way the caller - # of fetch can differentiate between connection and actual data download - # and measure download times accordingly. + # Define a generator function to be returned by fetch. This way the + # caller of fetch can differentiate between connection and actual data + # download and measure download times accordingly. def chunks(): try: bytes_received = 0 while True: - # We download a fixed chunk of data in every round. This is so that we - # can defend against slow retrieval attacks. Furthermore, we do not - # wish to download an extremely large file in one shot. - # Before beginning the round, sleep (if set) for a short amount of - # time so that the CPU is not hogged in the while loop. + # We download a fixed chunk of data in every round. This is + # so that we can defend against slow retrieval attacks. + # Furthermore, we do not wish to download an extremely + # large file in one shot. Before beginning the round, sleep + # (if set) for a short amount of time so that the CPU is not + # hogged in the while loop. if tuf.settings.SLEEP_BEFORE_ROUND: time.sleep(tuf.settings.SLEEP_BEFORE_ROUND) @@ -102,13 +107,15 @@ def chunks(): required_length - bytes_received, ) - # NOTE: This may not handle some servers adding a Content-Encoding - # header, which may cause urllib3 to misbehave: + # NOTE: This may not handle some servers adding a + # Content-Encoding header, which may cause urllib3 to + # misbehave: # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 data = response.raw.read(read_amount) bytes_received += len(data) - # We might have no more data to read. Check number of bytes downloaded. + # We might have no more data to read. Check number of bytes + # downloaded. if not data: logger.debug( "Downloaded " From 575c15b3396fdcad1bdc859716c159c783ce2910 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 8 Apr 2021 13:02:24 +0300 Subject: [PATCH 18/86] Fix pylint W0212: protected-access Fix "Access to a protected member _mirror_target_download, _mirror_meta_download of a client class" Signed-off-by: Teodora Sechkova --- tuf/client_rework/mirrors.py | 4 ++-- tuf/client_rework/updater_rework.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py index 0bdf07e2e6..91b9e87b4f 100644 --- a/tuf/client_rework/mirrors.py +++ b/tuf/client_rework/mirrors.py @@ -142,7 +142,7 @@ def get_list_of_mirrors(file_type, file_path, mirrors_dict): return list_of_mirrors -def _mirror_meta_download( +def mirror_meta_download( filename: str, upper_length: int, mirrors_config: Dict, @@ -171,7 +171,7 @@ def _mirror_meta_download( raise tuf.exceptions.NoWorkingMirrorError(file_mirror_errors) -def _mirror_target_download( +def mirror_target_download( fileinfo: str, mirrors_config: Dict, fetcher: "FetcherInterface" ) -> BinaryIO: """ diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 8ddde1e4eb..ec783fa4b2 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -148,7 +148,7 @@ def download_target(self, target: Dict, destination_directory: str): The file is saved to the 'destination_directory' argument. """ try: - for temp_obj in mirrors._mirror_target_download( + for temp_obj in mirrors.mirror_target_download( target, self._mirrors, self._fetcher ): @@ -217,7 +217,7 @@ def _load_root(self) -> None: verified_root = None for next_version in range(lower_bound, upper_bound): try: - mirror_download = mirrors._mirror_meta_download( + mirror_download = mirrors.mirror_meta_download( self._get_relative_meta_name("root", version=next_version), settings.DEFAULT_ROOT_REQUIRED_LENGTH, self._mirrors, @@ -280,7 +280,7 @@ def _load_timestamp(self) -> None: TODO """ # TODO Check if timestamp exists locally - for temp_obj in mirrors._mirror_meta_download( + for temp_obj in mirrors.mirror_meta_download( "timestamp.json", settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, self._mirrors, @@ -321,7 +321,7 @@ def _load_snapshot(self) -> None: # Check if exists locally # self.loadLocal('snapshot', snapshotVerifier) - for temp_obj in mirrors._mirror_meta_download( + for temp_obj in mirrors.mirror_meta_download( "snapshot.json", length, self._mirrors, self._fetcher ): @@ -360,7 +360,7 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: # Check if exists locally # self.loadLocal('snapshot', targetsVerifier) - for temp_obj in mirrors._mirror_meta_download( + for temp_obj in mirrors.mirror_meta_download( targets_role + ".json", length, self._mirrors, self._fetcher ): From 7a717f6179f6d24903c54ec94618e60790558190 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 8 Apr 2021 14:02:25 +0300 Subject: [PATCH 19/86] Fix pylint C0103 and W0612 - fix C0103 invalid argument name STRICT_REQUIRED_LENGTH - use 'dummy' as an accepted by pylint unused variable name (W0612) Signed-off-by: Teodora Sechkova --- tuf/client_rework/download.py | 14 +++++++------- tuf/client_rework/mirrors.py | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tuf/client_rework/download.py b/tuf/client_rework/download.py index d04cc2baa3..0516d5e5ed 100644 --- a/tuf/client_rework/download.py +++ b/tuf/client_rework/download.py @@ -50,7 +50,7 @@ logger = logging.getLogger(__name__) -def download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): +def download_file(url, required_length, fetcher, strict_required_length=True): """ Given the url and length of the desired file, this function opens a @@ -66,7 +66,7 @@ def download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): required_length: An integer value representing the length of the file. - STRICT_REQUIRED_LENGTH: + strict_required_length: A Boolean indicator used to signal whether we should perform strict checking of required_length. True by default. We explicitly set this to False when we know that we want to turn this off for downloading the @@ -141,7 +141,7 @@ def download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): _check_downloaded_length( number_of_bytes_received, required_length, - STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH, + strict_required_length=strict_required_length, average_download_speed=average_download_speed, ) @@ -158,7 +158,7 @@ def download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): def _check_downloaded_length( total_downloaded, required_length, - STRICT_REQUIRED_LENGTH=True, + strict_required_length=True, average_download_speed=None, ): """ @@ -178,7 +178,7 @@ def _check_downloaded_length( top-level roles. In both cases, 'required_length' is actually an upper limit on the length of the downloaded file. - STRICT_REQUIRED_LENGTH: + strict_required_length: A Boolean indicator used to signal whether we should perform strict checking of required_length. True by default. We explicitly set this to False when we know that we want to turn this off for downloading the @@ -192,7 +192,7 @@ def _check_downloaded_length( securesystemslib.exceptions.DownloadLengthMismatchError, if - STRICT_REQUIRED_LENGTH is True and total_downloaded is not equal + strict_required_length is True and total_downloaded is not equal required_length. tuf.exceptions.SlowRetrievalError, if the total downloaded was @@ -214,7 +214,7 @@ def _check_downloaded_length( # What we downloaded is not equal to the required length, but did we ask # for strict checking of required length? - if STRICT_REQUIRED_LENGTH: + if strict_required_length: logger.info( "Downloaded " + str(total_downloaded) + " bytes, but" " expected " diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py index 91b9e87b4f..debf772f34 100644 --- a/tuf/client_rework/mirrors.py +++ b/tuf/client_rework/mirrors.py @@ -110,7 +110,7 @@ def get_list_of_mirrors(file_type, file_path, mirrors_dict): in_confined_directory = securesystemslib.util.file_in_confined_directories list_of_mirrors = [] - for junk, mirror_info in six.iteritems(mirrors_dict): + for dummy, mirror_info in six.iteritems(mirrors_dict): # Does mirror serve this file type at all? path = mirror_info.get(path_key) if path is None: @@ -157,7 +157,7 @@ def mirror_meta_download( for file_mirror in file_mirrors: try: temp_obj = download.download_file( - file_mirror, upper_length, fetcher, STRICT_REQUIRED_LENGTH=False + file_mirror, upper_length, fetcher, strict_required_length=False ) temp_obj.seek(0) From 1d2721df80aeee860f98b3722e633665a4902ab4 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 8 Apr 2021 14:08:20 +0300 Subject: [PATCH 20/86] Fix pylint R1720: Unnecessary "else" after "raise" Signed-off-by: Teodora Sechkova --- tuf/client_rework/download.py | 56 ++++++++++++++++------------------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/tuf/client_rework/download.py b/tuf/client_rework/download.py index 0516d5e5ed..ec574c3f7d 100644 --- a/tuf/client_rework/download.py +++ b/tuf/client_rework/download.py @@ -131,11 +131,10 @@ def download_file(url, required_length, fetcher, strict_required_length=True): ) break - else: - logger.debug( - "The average download speed has not dipped below the" - " minimum average download speed set in tuf.settings.py." - ) + logger.debug( + "The average download speed has not dipped below the" + " minimum average download speed set in tuf.settings.py." + ) # Does the total number of downloaded bytes match the required length? _check_downloaded_length( @@ -236,34 +235,31 @@ def _check_downloaded_length( if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: raise tuf.exceptions.SlowRetrievalError(average_download_speed) - else: - logger.debug( - "Good average download speed: " - + repr(average_download_speed) - + " bytes per second" - ) + logger.debug( + "Good average download speed: " + + repr(average_download_speed) + + " bytes per second" + ) raise tuf.exceptions.DownloadLengthMismatchError( required_length, total_downloaded ) - else: - # We specifically disabled strict checking of required length, but - # we will log a warning anyway. This is useful when we wish to - # download the Timestamp or Root metadata, for which we have no - # signed metadata; so, we must guess a reasonable required_length - # for it. - if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: - raise tuf.exceptions.SlowRetrievalError(average_download_speed) - - else: - logger.debug( - "Good average download speed: " - + repr(average_download_speed) - + " bytes per second" - ) + # We specifically disabled strict checking of required length, but + # we will log a warning anyway. This is useful when we wish to + # download the Timestamp or Root metadata, for which we have no + # signed metadata; so, we must guess a reasonable required_length + # for it. + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + raise tuf.exceptions.SlowRetrievalError(average_download_speed) + + logger.debug( + "Good average download speed: " + + repr(average_download_speed) + + " bytes per second" + ) - logger.info( - "Downloaded " + str(total_downloaded) + " bytes out of an" - " upper limit of " + str(required_length) + " bytes." - ) + logger.info( + "Downloaded " + str(total_downloaded) + " bytes out of an" + " upper limit of " + str(required_length) + " bytes." + ) From aaedcfe56261a607a828489f492c09ede6e171af Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 8 Apr 2021 14:11:18 +0300 Subject: [PATCH 21/86] Fix pylint W0703: broad-except pylint cannot figure out that we store the exceptions in a dictionary to raise them later so we disable the warning. This should be reviewed in the future still. Signed-off-by: Teodora Sechkova --- tuf/client_rework/mirrors.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py index debf772f34..7177809d38 100644 --- a/tuf/client_rework/mirrors.py +++ b/tuf/client_rework/mirrors.py @@ -163,7 +163,10 @@ def mirror_meta_download( temp_obj.seek(0) yield temp_obj - except Exception as exception: + # pylint cannot figure out that we store the exceptions + # in a dictionary to raise them later so we disable + # the warning. This should be reviewed in the future still. + except Exception as exception: # pylint: disable=broad-except file_mirror_errors[file_mirror] = exception finally: @@ -192,7 +195,10 @@ def mirror_target_download( temp_obj.seek(0) yield temp_obj - except Exception as exception: + # pylint cannot figure out that we store the exceptions + # in a dictionary to raise them later so we disable + # the warning. This should be reviewed in the future still. + except Exception as exception: # pylint: disable=broad-except file_mirror_errors[file_mirror] = exception finally: From 2f0bbd04f636b77f43b07782881c638058d71e86 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 8 Apr 2021 14:23:19 +0300 Subject: [PATCH 22/86] Remove use of future and six six and future are Python 2 compatibility modules which are no longer needed after the end of Python 2 support. Signed-off-by: Teodora Sechkova --- tuf/client_rework/download.py | 15 ++------------- tuf/client_rework/mirrors.py | 17 +++-------------- tuf/client_rework/requests_fetcher.py | 4 ++-- 3 files changed, 7 insertions(+), 29 deletions(-) diff --git a/tuf/client_rework/download.py b/tuf/client_rework/download.py index ec574c3f7d..ac097edcf3 100644 --- a/tuf/client_rework/download.py +++ b/tuf/client_rework/download.py @@ -23,24 +23,13 @@ metadata of that file. """ - -# Help with Python 3 compatibility, where the print statement is a function, an -# implicit relative import is invalid, and the '/' operator performs true -# division. Example: print 'hello world' raises a 'SyntaxError' exception. -from __future__ import ( - absolute_import, - division, - print_function, - unicode_literals, -) - import logging import tempfile import timeit +from urllib import parse import securesystemslib import securesystemslib.util -import six import tuf import tuf.exceptions @@ -98,7 +87,7 @@ def download_file(url, required_length, fetcher, strict_required_length=True): # a url with their single-character equivalent. A back-slash may be # encoded as %5c in the url, which should also be replaced with a forward # slash. - url = six.moves.urllib.parse.unquote(url).replace("\\", "/") + url = parse.unquote(url).replace("\\", "/") logger.info("Downloading: " + repr(url)) # This is the temporary file that we will return to contain the contents of diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py index 7177809d38..311416dc75 100644 --- a/tuf/client_rework/mirrors.py +++ b/tuf/client_rework/mirrors.py @@ -22,22 +22,11 @@ of the file with respect to the base url. """ - -# Help with Python 3 compatibility, where the print statement is a function, an -# implicit relative import is invalid, and the '/' operator performs true -# division. Example: print 'hello world' raises a 'SyntaxError' exception. -from __future__ import ( - absolute_import, - division, - print_function, - unicode_literals, -) - import os from typing import BinaryIO, Dict, TextIO +from urllib import parse import securesystemslib -import six import tuf import tuf.client_rework.download as download @@ -110,7 +99,7 @@ def get_list_of_mirrors(file_type, file_path, mirrors_dict): in_confined_directory = securesystemslib.util.file_in_confined_directories list_of_mirrors = [] - for dummy, mirror_info in six.iteritems(mirrors_dict): + for mirror_info in mirrors_dict.values(): # Does mirror serve this file type at all? path = mirror_info.get(path_key) if path is None: @@ -131,7 +120,7 @@ def get_list_of_mirrors(file_type, file_path, mirrors_dict): # on the server side. Do *NOT* pass URLs with Unicode characters without # first encoding the URL as UTF-8. Needed a long-term solution with #61. # http://bugs.python.org/issue1712522 - file_path = six.moves.urllib.parse.quote(file_path) + file_path = parse.quote(file_path) url = os.path.join(mirror_info["url_prefix"], path, file_path) # The above os.path.join() result as well as input file_path may be diff --git a/tuf/client_rework/requests_fetcher.py b/tuf/client_rework/requests_fetcher.py index 545a23feff..fadd022743 100644 --- a/tuf/client_rework/requests_fetcher.py +++ b/tuf/client_rework/requests_fetcher.py @@ -7,10 +7,10 @@ import logging import time +from urllib import parse # Imports import requests -import six import urllib3.exceptions import tuf.exceptions @@ -147,7 +147,7 @@ def _get_session(self, url): """ # Use a different requests.Session per schema+hostname combination, to # reuse connections while minimizing subtle security issues. - parsed_url = six.moves.urllib.parse.urlparse(url) + parsed_url = parse.urlparse(url) if not parsed_url.scheme or not parsed_url.hostname: raise tuf.exceptions.URLParsingError( From 546bb785f9461e10400a7f4c7e36368ebd3b5cc9 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 8 Apr 2021 14:45:24 +0300 Subject: [PATCH 23/86] Fix client imports Fix imports to be vendoring compatible. Signed-off-by: Teodora Sechkova --- tuf/client_rework/download.py | 20 +++++++++----------- tuf/client_rework/metadata_wrapper.py | 8 ++++---- tuf/client_rework/mirrors.py | 23 ++++++++++++----------- tuf/client_rework/requests_fetcher.py | 22 +++++++++++----------- tuf/client_rework/updater_rework.py | 2 +- 5 files changed, 37 insertions(+), 38 deletions(-) diff --git a/tuf/client_rework/download.py b/tuf/client_rework/download.py index ac097edcf3..858355523f 100644 --- a/tuf/client_rework/download.py +++ b/tuf/client_rework/download.py @@ -28,12 +28,10 @@ import timeit from urllib import parse -import securesystemslib -import securesystemslib.util +from securesystemslib import formats as sslib_formats import tuf -import tuf.exceptions -import tuf.formats +from tuf import exceptions, formats # See 'log.py' to learn how logging is handled in TUF. logger = logging.getLogger(__name__) @@ -65,7 +63,7 @@ def download_file(url, required_length, fetcher, strict_required_length=True): A file object is created on disk to store the contents of 'url'. - tuf.exceptions.DownloadLengthMismatchError, if there was a + exceptions.DownloadLengthMismatchError, if there was a mismatch of observed vs expected lengths while downloading the file. securesystemslib.exceptions.FormatError, if any of the arguments are @@ -78,8 +76,8 @@ def download_file(url, required_length, fetcher, strict_required_length=True): """ # Do all of the arguments have the appropriate format? # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. - securesystemslib.formats.URL_SCHEMA.check_match(url) - tuf.formats.LENGTH_SCHEMA.check_match(required_length) + sslib_formats.URL_SCHEMA.check_match(url) + formats.LENGTH_SCHEMA.check_match(required_length) # 'url.replace('\\', '/')' is needed for compatibility with Windows-based # systems, because they might use back-slashes in place of forward-slashes. @@ -183,7 +181,7 @@ def _check_downloaded_length( strict_required_length is True and total_downloaded is not equal required_length. - tuf.exceptions.SlowRetrievalError, if the total downloaded was + exceptions.SlowRetrievalError, if the total downloaded was done in less than the acceptable download speed (as set in tuf.settings.py). @@ -222,7 +220,7 @@ def _check_downloaded_length( ) if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: - raise tuf.exceptions.SlowRetrievalError(average_download_speed) + raise exceptions.SlowRetrievalError(average_download_speed) logger.debug( "Good average download speed: " @@ -230,7 +228,7 @@ def _check_downloaded_length( + " bytes per second" ) - raise tuf.exceptions.DownloadLengthMismatchError( + raise exceptions.DownloadLengthMismatchError( required_length, total_downloaded ) @@ -240,7 +238,7 @@ def _check_downloaded_length( # signed metadata; so, we must guess a reasonable required_length # for it. if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: - raise tuf.exceptions.SlowRetrievalError(average_download_speed) + raise exceptions.SlowRetrievalError(average_download_speed) logger.debug( "Good average download speed: " diff --git a/tuf/client_rework/metadata_wrapper.py b/tuf/client_rework/metadata_wrapper.py index 6f182dc336..18f0d6d9aa 100644 --- a/tuf/client_rework/metadata_wrapper.py +++ b/tuf/client_rework/metadata_wrapper.py @@ -9,7 +9,7 @@ from securesystemslib.keys import format_metadata_to_key -import tuf.exceptions +from tuf import exceptions, formats from tuf.api import metadata @@ -64,7 +64,7 @@ def verify(self, keys, threshold): verified += 1 if verified < threshold: - raise tuf.exceptions.InsufficientKeysError + raise exceptions.InsufficientKeysError def persist(self, filename): """ @@ -77,13 +77,13 @@ def expires(self, reference_time=None): TODO """ if reference_time is None: - expires_timestamp = tuf.formats.datetime_to_unix_timestamp( + expires_timestamp = formats.datetime_to_unix_timestamp( self._meta.signed.expires ) reference_time = int(time.time()) if expires_timestamp < reference_time: - raise tuf.exceptions.ExpiredMetadataError + raise exceptions.ExpiredMetadataError class RootWrapper(MetadataWrapper): diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py index 311416dc75..97962e9eb7 100644 --- a/tuf/client_rework/mirrors.py +++ b/tuf/client_rework/mirrors.py @@ -26,11 +26,12 @@ from typing import BinaryIO, Dict, TextIO from urllib import parse -import securesystemslib +from securesystemslib import exceptions as sslib_exceptions +from securesystemslib import formats as sslib_formats +from securesystemslib import util as sslib_util -import tuf -import tuf.client_rework.download as download -import tuf.formats +from tuf import exceptions, formats +from tuf.client_rework import download # The type of file to be downloaded from a repository. The # 'get_list_of_mirrors' function supports these file types. @@ -78,13 +79,13 @@ def get_list_of_mirrors(file_type, file_path, mirrors_dict): """ # Checking if all the arguments have appropriate format. - tuf.formats.RELPATH_SCHEMA.check_match(file_path) - tuf.formats.MIRRORDICT_SCHEMA.check_match(mirrors_dict) - securesystemslib.formats.NAME_SCHEMA.check_match(file_type) + formats.RELPATH_SCHEMA.check_match(file_path) + formats.MIRRORDICT_SCHEMA.check_match(mirrors_dict) + sslib_formats.NAME_SCHEMA.check_match(file_type) # Verify 'file_type' is supported. if file_type not in _SUPPORTED_FILE_TYPES: - raise securesystemslib.exceptions.Error( + raise sslib_exceptions.Error( "Invalid file_type argument." " Supported file types: " + repr(_SUPPORTED_FILE_TYPES) ) @@ -96,7 +97,7 @@ def get_list_of_mirrors(file_type, file_path, mirrors_dict): # on a repository mirror when fetching target files. This field may be set # by the client when the repository mirror is added to the # 'tuf.client.updater.Updater' object. - in_confined_directory = securesystemslib.util.file_in_confined_directories + in_confined_directory = sslib_util.file_in_confined_directories list_of_mirrors = [] for mirror_info in mirrors_dict.values(): @@ -160,7 +161,7 @@ def mirror_meta_download( finally: if file_mirror_errors: - raise tuf.exceptions.NoWorkingMirrorError(file_mirror_errors) + raise exceptions.NoWorkingMirrorError(file_mirror_errors) def mirror_target_download( @@ -192,4 +193,4 @@ def mirror_target_download( finally: if file_mirror_errors: - raise tuf.exceptions.NoWorkingMirrorError(file_mirror_errors) + raise exceptions.NoWorkingMirrorError(file_mirror_errors) diff --git a/tuf/client_rework/requests_fetcher.py b/tuf/client_rework/requests_fetcher.py index fadd022743..ef18233024 100644 --- a/tuf/client_rework/requests_fetcher.py +++ b/tuf/client_rework/requests_fetcher.py @@ -13,8 +13,8 @@ import requests import urllib3.exceptions -import tuf.exceptions -import tuf.settings +import tuf +from tuf import exceptions, settings from tuf.client_rework.fetcher import FetcherInterface # Globals @@ -58,9 +58,9 @@ def fetch(self, url, required_length): bytes. Raises: - tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving + exceptions.SlowRetrievalError: A timeout occurs while receiving data. - tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + exceptions.FetcherHTTPError: An HTTP error code is received. Returns: A bytes iterator @@ -76,7 +76,7 @@ def fetch(self, url, required_length): # - connect timeout (max delay before first byte is received) # - read (gap) timeout (max delay between bytes received) response = session.get( - url, stream=True, timeout=tuf.settings.SOCKET_TIMEOUT + url, stream=True, timeout=settings.SOCKET_TIMEOUT ) # Check response status. try: @@ -84,7 +84,7 @@ def fetch(self, url, required_length): except requests.HTTPError as e: response.close() status = e.response.status_code - raise tuf.exceptions.FetcherHTTPError(str(e), status) + raise exceptions.FetcherHTTPError(str(e), status) # Define a generator function to be returned by fetch. This way the # caller of fetch can differentiate between connection and actual data @@ -99,11 +99,11 @@ def chunks(): # large file in one shot. Before beginning the round, sleep # (if set) for a short amount of time so that the CPU is not # hogged in the while loop. - if tuf.settings.SLEEP_BEFORE_ROUND: - time.sleep(tuf.settings.SLEEP_BEFORE_ROUND) + if settings.SLEEP_BEFORE_ROUND: + time.sleep(settings.SLEEP_BEFORE_ROUND) read_amount = min( - tuf.settings.CHUNK_SIZE, + settings.CHUNK_SIZE, required_length - bytes_received, ) @@ -134,7 +134,7 @@ def chunks(): break except urllib3.exceptions.ReadTimeoutError as e: - raise tuf.exceptions.SlowRetrievalError(str(e)) + raise exceptions.SlowRetrievalError(str(e)) finally: response.close() @@ -150,7 +150,7 @@ def _get_session(self, url): parsed_url = parse.urlparse(url) if not parsed_url.scheme or not parsed_url.hostname: - raise tuf.exceptions.URLParsingError( + raise exceptions.URLParsingError( "Could not get scheme and hostname from URL: " + url ) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index ec783fa4b2..078f17304e 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -18,7 +18,7 @@ from tuf import exceptions, settings from tuf.client.fetcher import FetcherInterface -from tuf.client_rework import download, mirrors, requests_fetcher +from tuf.client_rework import mirrors, requests_fetcher from .metadata_wrapper import ( RootWrapper, From 101ab3d62b80c4f95753d2faf0fbe650de6b0012 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 14 Apr 2021 13:32:03 +0300 Subject: [PATCH 24/86] Disable pylint W1201: logging-not-lazy Disable pylint's "Use lazy % formatting in logging functions" warning until a common logging approach is decided. See #1334. Signed-off-by: Teodora Sechkova --- tuf/api/pylintrc | 1 + 1 file changed, 1 insertion(+) diff --git a/tuf/api/pylintrc b/tuf/api/pylintrc index 409a96149f..23cfce8aea 100644 --- a/tuf/api/pylintrc +++ b/tuf/api/pylintrc @@ -14,6 +14,7 @@ disable=fixme, too-few-public-methods, too-many-arguments, + logging-not-lazy, [BASIC] good-names=i,j,k,v,e,f,fn,fp,_type From a890a78ac7f724c06f939d00070e227866da1e18 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Wed, 28 Apr 2021 10:08:52 +0300 Subject: [PATCH 25/86] Add pylint disable for file object handling We return the file object so cannot use a context manager to close it. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tuf/client_rework/download.py b/tuf/client_rework/download.py index 858355523f..ec7b3e1ec0 100644 --- a/tuf/client_rework/download.py +++ b/tuf/client_rework/download.py @@ -90,7 +90,7 @@ def download_file(url, required_length, fetcher, strict_required_length=True): # This is the temporary file that we will return to contain the contents of # the downloaded file. - temp_file = tempfile.TemporaryFile() + temp_file = tempfile.TemporaryFile() # pylint: disable=consider-using-with average_download_speed = 0 number_of_bytes_received = 0 From b350746f4e885c547294aa5fa42c4a43055a67d5 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 15 Apr 2021 16:48:13 +0300 Subject: [PATCH 26/86] Remove mirror_*_download functions mirror_target_download and mirror_meta_download functions that return iterators are causing more troubles than good. Since there is a need to download and verify each file inside the mirrors loop (before continuing to the next mirror), feels like the correct component to do this is the Updater itself. This means slightly repetitive code inside the Updater class but with the benefit of better readability and less bug-prone implementation. Signed-off-by: Teodora Sechkova --- tuf/client_rework/mirrors.py | 68 +----- tuf/client_rework/updater_rework.py | 309 +++++++++++++++++++--------- 2 files changed, 215 insertions(+), 162 deletions(-) diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py index 97962e9eb7..83991e64e6 100644 --- a/tuf/client_rework/mirrors.py +++ b/tuf/client_rework/mirrors.py @@ -23,15 +23,13 @@ """ import os -from typing import BinaryIO, Dict, TextIO from urllib import parse from securesystemslib import exceptions as sslib_exceptions from securesystemslib import formats as sslib_formats from securesystemslib import util as sslib_util -from tuf import exceptions, formats -from tuf.client_rework import download +from tuf import formats # The type of file to be downloaded from a repository. The # 'get_list_of_mirrors' function supports these file types. @@ -130,67 +128,3 @@ def get_list_of_mirrors(file_type, file_path, mirrors_dict): list_of_mirrors.append(url.replace("\\", "/")) return list_of_mirrors - - -def mirror_meta_download( - filename: str, - upper_length: int, - mirrors_config: Dict, - fetcher: "FetcherInterface", -) -> TextIO: - """ - Download metadata file from the list of metadata mirrors - """ - file_mirrors = get_list_of_mirrors("meta", filename, mirrors_config) - - file_mirror_errors = {} - for file_mirror in file_mirrors: - try: - temp_obj = download.download_file( - file_mirror, upper_length, fetcher, strict_required_length=False - ) - - temp_obj.seek(0) - yield temp_obj - - # pylint cannot figure out that we store the exceptions - # in a dictionary to raise them later so we disable - # the warning. This should be reviewed in the future still. - except Exception as exception: # pylint: disable=broad-except - file_mirror_errors[file_mirror] = exception - - finally: - if file_mirror_errors: - raise exceptions.NoWorkingMirrorError(file_mirror_errors) - - -def mirror_target_download( - fileinfo: str, mirrors_config: Dict, fetcher: "FetcherInterface" -) -> BinaryIO: - """ - Download target file from the list of target mirrors - """ - # full_filename = _get_full_name(filename) - file_mirrors = get_list_of_mirrors( - "target", fileinfo["filepath"], mirrors_config - ) - - file_mirror_errors = {} - for file_mirror in file_mirrors: - try: - temp_obj = download.download_file( - file_mirror, fileinfo["fileinfo"]["length"], fetcher - ) - - temp_obj.seek(0) - yield temp_obj - - # pylint cannot figure out that we store the exceptions - # in a dictionary to raise them later so we disable - # the warning. This should be reviewed in the future still. - except Exception as exception: # pylint: disable=broad-except - file_mirror_errors[file_mirror] = exception - - finally: - if file_mirror_errors: - raise exceptions.NoWorkingMirrorError(file_mirror_errors) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 078f17304e..1049989c89 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -18,7 +18,7 @@ from tuf import exceptions, settings from tuf.client.fetcher import FetcherInterface -from tuf.client_rework import mirrors, requests_fetcher +from tuf.client_rework import download, mirrors, requests_fetcher from .metadata_wrapper import ( RootWrapper, @@ -147,22 +147,41 @@ def download_target(self, target: Dict, destination_directory: str): This method performs the actual download of the specified target. The file is saved to the 'destination_directory' argument. """ - try: - for temp_obj in mirrors.mirror_target_download( - target, self._mirrors, self._fetcher - ): - self._verify_target_file(temp_obj, target) - # break? should we break after first successful download? + temp_obj = None + file_mirror_errors = {} + file_mirrors = mirrors.get_list_of_mirrors( + "target", target["filepath"], self._mirrors + ) - filepath = os.path.join( - destination_directory, target["filepath"] + for file_mirror in file_mirrors: + try: + temp_obj = download.download_file( + file_mirror, target["fileinfo"]["length"], self._fetcher ) - sslib_util.persist_temp_file(temp_obj, filepath) - # pylint: disable=try-except-raise - except Exception: - # TODO: do something with exceptions - raise + + temp_obj.seek(0) + self._verify_target_file(temp_obj, target) + break + + except Exception as exception: # pylint: disable=broad-except + # Store the exceptions until all mirrors are iterated. + # If an exception is raised from one mirror but a valid + # file is found in the next one, the first exception is ignored. + file_mirror_errors[file_mirror] = exception + + if temp_obj: + temp_obj.close() + temp_obj = None + + # If all mirrors are iterated but a file object is not successfully + # downloaded and verifies, raise the collected errors + if not temp_obj: + raise exceptions.NoWorkingMirrorError(file_mirror_errors) + + filepath = os.path.join(destination_directory, target["filepath"]) + sslib_util.persist_temp_file(temp_obj, filepath) + temp_obj.close() def _get_full_meta_name( self, role: str, extension: str = ".json", version: int = None @@ -204,6 +223,7 @@ def _load_root(self) -> None: """ # Load trusted root metadata + # TODO: this should happen much earlier, on Updater.__init__ self._metadata["root"] = RootWrapper.from_json_file( self._get_full_meta_name("root") ) @@ -213,97 +233,164 @@ def _load_root(self) -> None: # root metadata file. lower_bound = self._metadata["root"].version upper_bound = lower_bound + settings.MAX_NUMBER_ROOT_ROTATIONS + intermediate_root = None - verified_root = None for next_version in range(lower_bound, upper_bound): try: - mirror_download = mirrors.mirror_meta_download( + root_mirrors = mirrors.get_list_of_mirrors( + "meta", self._get_relative_meta_name("root", version=next_version), - settings.DEFAULT_ROOT_REQUIRED_LENGTH, self._mirrors, - self._fetcher, ) - for temp_obj in mirror_download: - try: - verified_root = self._verify_root(temp_obj) - # pylint: disable=try-except-raise - except Exception: - raise + # For each version of root iterate over the list of mirrors + # until an intermediate root is successfully downloaded and + # verified. + intermediate_root = self._root_mirrors_download(root_mirrors) + # Exit the loop when all mirrors have raised only 403 / 404 errors, + # which indicates that a bigger root version does not exist. except exceptions.NoWorkingMirrorError as exception: for mirror_error in exception.mirror_errors.values(): + # Otherwise, reraise the error, because it is not a simple + # HTTP error. if neither_403_nor_404(mirror_error): - temp_obj.close() + logger.info( + "Misc error for root version " + str(next_version) + ) raise + logger.debug("HTTP error for root version " + str(next_version)) + # If we are here, then we ran into only 403 / 404 errors, which + # are good reasons to suspect that the next root metadata file + # does not exist. break - # Check for a freeze attack. The latest known time MUST be lower - # than the expiration timestamp in the trusted root metadata file - try: - verified_root.expires() - except exceptions.ExpiredMetadataError: - temp_obj.close() # pylint: disable=undefined-loop-variable - - # 1.9. If the timestamp and / or snapshot keys have been rotated, - # then delete the trusted timestamp and snapshot metadata files. - if self._metadata["root"].keys("timestamp") != verified_root.keys( - "timestamp" - ): - # FIXME: use abstract storage - os.remove(self._get_full_meta_name("timestamp")) - self._metadata["timestamp"] = {} + # Continue only if a newer root version is found + if intermediate_root is not None: + # Check for a freeze attack. The latest known time MUST be lower + # than the expiration timestamp in the trusted root metadata file + # TODO define which exceptions are part of the public API + intermediate_root.expires() + + # 1.9. If the timestamp and / or snapshot keys have been rotated, + # then delete the trusted timestamp and snapshot metadata files. + if self._metadata["root"].keys( + "timestamp" + ) != intermediate_root.keys("timestamp"): + # FIXME: use abstract storage + os.remove(self._get_full_meta_name("timestamp")) + self._metadata["timestamp"] = {} + + if self._metadata["root"].keys( + "snapshot" + ) != intermediate_root.keys("snapshot"): + # FIXME: use abstract storage + os.remove(self._get_full_meta_name("snapshot")) + self._metadata["snapshot"] = {} + + # Set the trusted root metadata file to the new root + # metadata file + self._metadata["root"] = intermediate_root + # Persist root metadata. The client MUST write the file to + # non-volatile storage as FILENAME.EXT (e.g. root.json). + self._metadata["root"].persist(self._get_full_meta_name("root")) + + # 1.10. Set whether consistent snapshots are used as per + # the trusted root metadata file + self._consistent_snapshot = self._metadata[ + "root" + ].signed.consistent_snapshot + + def _root_mirrors_download(self, root_mirrors: Dict) -> "RootWrapper": + """Iterate over the list of "root_mirrors" until an intermediate + root is successfully downloaded and verified. + Raise "NoWorkingMirrorError" if a root file cannot be downloaded or + verified from any mirror""" + + file_mirror_errors = {} + temp_obj = None + intermediate_root = None + + for root_mirror in root_mirrors: + try: + temp_obj = download.download_file( + root_mirror, + settings.DEFAULT_ROOT_REQUIRED_LENGTH, + self._fetcher, + strict_required_length=False, + ) - if self._metadata["root"].keys("snapshot") != verified_root.keys( - "snapshot" - ): - # FIXME: use abstract storage - os.remove(self._get_full_meta_name("snapshot")) - self._metadata["snapshot"] = {} + temp_obj.seek(0) + intermediate_root = self._verify_root(temp_obj) + # When we reach this point, a root file has been successfully + # downloaded and verified so we can exit the loop. + break - self._metadata["root"] = verified_root - # Persist root metadata. The client MUST write the file to non-volatile - # storage as FILENAME.EXT (e.g. root.json). - self._metadata["root"].persist(self._get_full_meta_name("root")) + # pylint cannot figure out that we store the exceptions + # in a dictionary to raise them later so we disable + # the warning. This should be reviewed in the future still. + except Exception as exception: # pylint: disable=broad-except + # Store the exceptions until all mirrors are iterated. + # If an exception is raised from one mirror but a valid + # file is found in the next one, the first exception is ignored. + file_mirror_errors[root_mirror] = exception + + finally: + if temp_obj: + temp_obj.close() + temp_obj = None - # 1.10. Set whether consistent snapshots are used as per - # the trusted root metadata file - self._consistent_snapshot = self._metadata[ - "root" - ].signed.consistent_snapshot + if not intermediate_root: + # If all mirrors are tried but a valid root file is not found, + # then raise an exception with the stored errors + raise exceptions.NoWorkingMirrorError(file_mirror_errors) - temp_obj.close() # pylint: disable=undefined-loop-variable + return intermediate_root def _load_timestamp(self) -> None: """ TODO """ # TODO Check if timestamp exists locally - for temp_obj in mirrors.mirror_meta_download( - "timestamp.json", - settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, - self._mirrors, - self._fetcher, - ): + file_mirrors = mirrors.get_list_of_mirrors( + "meta", "timestamp.json", self._mirrors + ) + + file_mirror_errors = {} + verified_timestamp = None + for file_mirror in file_mirrors: try: - verified_tampstamp = self._verify_timestamp(temp_obj) - # break? should we break after first successful download? - except Exception: - # TODO: do something with exceptions - temp_obj.close() - raise + temp_obj = download.download_file( + file_mirror, + settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, + self._fetcher, + strict_required_length=False, + ) - self._metadata["timestamp"] = verified_tampstamp + temp_obj.seek(0) + verified_timestamp = self._verify_timestamp(temp_obj) + break + + except Exception as exception: # pylint: disable=broad-except + file_mirror_errors[file_mirror] = exception + + finally: + if temp_obj: + temp_obj.close() + temp_obj = None + + if not verified_timestamp: + raise exceptions.NoWorkingMirrorError(file_mirror_errors) + + self._metadata["timestamp"] = verified_timestamp # Persist root metadata. The client MUST write the file to # non-volatile storage as FILENAME.EXT (e.g. root.json). self._metadata["timestamp"].persist( self._get_full_meta_name("timestamp.json") ) - temp_obj.close() # pylint: disable=undefined-loop-variable - def _load_snapshot(self) -> None: """ TODO @@ -319,19 +406,37 @@ def _load_snapshot(self) -> None: # else: # version = None - # Check if exists locally - # self.loadLocal('snapshot', snapshotVerifier) - for temp_obj in mirrors.mirror_meta_download( - "snapshot.json", length, self._mirrors, self._fetcher - ): + # TODO: Check if exists locally + file_mirrors = mirrors.get_list_of_mirrors( + "meta", "snapshot.json", self._mirrors + ) + + file_mirror_errors = {} + verified_snapshot = False + for file_mirror in file_mirrors: try: + temp_obj = download.download_file( + file_mirror, + length, + self._fetcher, + strict_required_length=False, + ) + + temp_obj.seek(0) verified_snapshot = self._verify_snapshot(temp_obj) - # break? should we break after first successful download? - except Exception: - # TODO: do something with exceptions - temp_obj.close() - raise + break + + except Exception as exception: # pylint: disable=broad-except + file_mirror_errors[file_mirror] = exception + + finally: + if temp_obj: + temp_obj.close() + temp_obj = None + + if not verified_snapshot: + raise exceptions.NoWorkingMirrorError(file_mirror_errors) self._metadata["snapshot"] = verified_snapshot # Persist root metadata. The client MUST write the file to @@ -340,8 +445,6 @@ def _load_snapshot(self) -> None: self._get_full_meta_name("snapshot.json") ) - temp_obj.close() # pylint: disable=undefined-loop-variable - def _load_targets(self, targets_role: str, parent_role: str) -> None: """ TODO @@ -357,22 +460,40 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: # else: # version = None - # Check if exists locally - # self.loadLocal('snapshot', targetsVerifier) + # TODO: Check if exists locally - for temp_obj in mirrors.mirror_meta_download( - targets_role + ".json", length, self._mirrors, self._fetcher - ): + file_mirrors = mirrors.get_list_of_mirrors( + "meta", f"{targets_role}.json", self._mirrors + ) + file_mirror_errors = {} + verified_targets = False + for file_mirror in file_mirrors: try: + temp_obj = download.download_file( + file_mirror, + length, + self._fetcher, + strict_required_length=False, + ) + + temp_obj.seek(0) verified_targets = self._verify_targets( temp_obj, targets_role, parent_role ) - # break? should we break after first successful download? - except Exception: - # TODO: do something with exceptions - temp_obj.close() - raise + break + + except Exception as exception: # pylint: disable=broad-except + file_mirror_errors[file_mirror] = exception + + finally: + if temp_obj: + temp_obj.close() + temp_obj = None + + if not verified_targets: + raise exceptions.NoWorkingMirrorError(file_mirror_errors) + self._metadata[targets_role] = verified_targets # Persist root metadata. The client MUST write the file to # non-volatile storage as FILENAME.EXT (e.g. root.json). @@ -380,8 +501,6 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: self._get_full_meta_name(targets_role, extension=".json") ) - temp_obj.close() # pylint: disable=undefined-loop-variable - def _verify_root(self, temp_obj: TextIO) -> RootWrapper: """ TODO From 482ea1cc86f18f3624939b1dc0883916eb47739e Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 15 Apr 2021 16:59:27 +0300 Subject: [PATCH 27/86] Remove _get_relative_meta_name Current implementation does not bring much benefit and can be replaced with a simple f-string. Signed-off-by: Teodora Sechkova --- tuf/client_rework/updater_rework.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 1049989c89..a6a171bbcb 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -202,20 +202,6 @@ def _get_full_meta_name( filename, ) - @staticmethod - def _get_relative_meta_name( - role: str, extension: str = ".json", version: int = None - ) -> str: - """ - Helper method returning full metadata file path given the role name - and file extension. - """ - if version is None: - filename = role + extension - else: - filename = str(version) + "." + role + extension - return filename - def _load_root(self) -> None: """ If metadata file for 'root' role does not exist locally, download it @@ -239,7 +225,7 @@ def _load_root(self) -> None: try: root_mirrors = mirrors.get_list_of_mirrors( "meta", - self._get_relative_meta_name("root", version=next_version), + f"{next_version}.root.json", self._mirrors, ) From e885ae4a47e8c00c40a049a6bccafed3ce016535 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 15 Apr 2021 17:31:30 +0300 Subject: [PATCH 28/86] Replace file objects used as function arguments By replacing file objects passed as function arguments with the read file content, we simplify temporary file objects life cycle management. Temporary files are handled in a single function. This is done for metadata files, which are fully read into memory right after download, anyway. Same is not true for target files which preferably should be treated in chunks so targets download and verification still deal with file objects. _check_hashes is split in two functions, one dealing correctly with file objects and one using directly file content. Signed-off-by: Teodora Sechkova --- tuf/client_rework/metadata_wrapper.py | 3 +- tuf/client_rework/updater_rework.py | 76 +++++++++++++-------------- 2 files changed, 39 insertions(+), 40 deletions(-) diff --git a/tuf/client_rework/metadata_wrapper.py b/tuf/client_rework/metadata_wrapper.py index 18f0d6d9aa..b1dd2d1d58 100644 --- a/tuf/client_rework/metadata_wrapper.py +++ b/tuf/client_rework/metadata_wrapper.py @@ -22,9 +22,8 @@ def __init__(self, meta): self._meta = meta @classmethod - def from_json_object(cls, tmp_file): + def from_json_object(cls, raw_data): """Loads JSON-formatted TUF metadata from a file object.""" - raw_data = tmp_file.read() # Use local scope import to avoid circular import errors # pylint: disable=import-outside-toplevel from tuf.api.serialization.json import JSONDeserializer diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index a6a171bbcb..435a21485f 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -10,7 +10,7 @@ import fnmatch import logging import os -from typing import BinaryIO, Dict, Optional, TextIO +from typing import Dict, Optional from securesystemslib import exceptions as sslib_exceptions from securesystemslib import hash as sslib_hash @@ -159,9 +159,9 @@ def download_target(self, target: Dict, destination_directory: str): temp_obj = download.download_file( file_mirror, target["fileinfo"]["length"], self._fetcher ) - + _check_file_length(temp_obj, target["fileinfo"]["length"]) temp_obj.seek(0) - self._verify_target_file(temp_obj, target) + _check_hashes_obj(temp_obj, target["fileinfo"]["hashes"]) break except Exception as exception: # pylint: disable=broad-except @@ -308,7 +308,7 @@ def _root_mirrors_download(self, root_mirrors: Dict) -> "RootWrapper": ) temp_obj.seek(0) - intermediate_root = self._verify_root(temp_obj) + intermediate_root = self._verify_root(temp_obj.read()) # When we reach this point, a root file has been successfully # downloaded and verified so we can exit the loop. break @@ -356,7 +356,7 @@ def _load_timestamp(self) -> None: ) temp_obj.seek(0) - verified_timestamp = self._verify_timestamp(temp_obj) + verified_timestamp = self._verify_timestamp(temp_obj.read()) break except Exception as exception: # pylint: disable=broad-except @@ -410,7 +410,7 @@ def _load_snapshot(self) -> None: ) temp_obj.seek(0) - verified_snapshot = self._verify_snapshot(temp_obj) + verified_snapshot = self._verify_snapshot(temp_obj.read()) break except Exception as exception: # pylint: disable=broad-except @@ -465,7 +465,7 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: temp_obj.seek(0) verified_targets = self._verify_targets( - temp_obj, targets_role, parent_role + temp_obj.read(), targets_role, parent_role ) break @@ -487,12 +487,12 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: self._get_full_meta_name(targets_role, extension=".json") ) - def _verify_root(self, temp_obj: TextIO) -> RootWrapper: + def _verify_root(self, file_content: bytes) -> RootWrapper: """ TODO """ - intermediate_root = RootWrapper.from_json_object(temp_obj) + intermediate_root = RootWrapper.from_json_object(file_content) # Check for an arbitrary software attack trusted_root = self._metadata["root"] @@ -505,7 +505,6 @@ def _verify_root(self, temp_obj: TextIO) -> RootWrapper: # Check for a rollback attack. if intermediate_root.version < trusted_root.version: - temp_obj.close() raise exceptions.ReplayedMetadataError( "root", intermediate_root.version(), trusted_root.version() ) @@ -514,11 +513,11 @@ def _verify_root(self, temp_obj: TextIO) -> RootWrapper: return intermediate_root - def _verify_timestamp(self, temp_obj: TextIO) -> TimestampWrapper: + def _verify_timestamp(self, file_content: bytes) -> TimestampWrapper: """ TODO """ - intermediate_timestamp = TimestampWrapper.from_json_object(temp_obj) + intermediate_timestamp = TimestampWrapper.from_json_object(file_content) # Check for an arbitrary software attack trusted_root = self._metadata["root"] @@ -532,7 +531,6 @@ def _verify_timestamp(self, temp_obj: TextIO) -> TimestampWrapper: intermediate_timestamp.signed.version <= self._metadata["timestamp"].version ): - temp_obj.close() raise exceptions.ReplayedMetadataError( "root", intermediate_timestamp.version(), @@ -544,7 +542,6 @@ def _verify_timestamp(self, temp_obj: TextIO) -> TimestampWrapper: intermediate_timestamp.snapshot.version <= self._metadata["timestamp"].snapshot["version"] ): - temp_obj.close() raise exceptions.ReplayedMetadataError( "root", intermediate_timestamp.snapshot.version(), @@ -555,7 +552,7 @@ def _verify_timestamp(self, temp_obj: TextIO) -> TimestampWrapper: return intermediate_timestamp - def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: + def _verify_snapshot(self, file_content: bytes) -> SnapshotWrapper: """ TODO """ @@ -563,16 +560,15 @@ def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: # Check against timestamp metadata if self._metadata["timestamp"].snapshot.get("hash"): _check_hashes( - temp_obj, self._metadata["timestamp"].snapshot.get("hash") + file_content, self._metadata["timestamp"].snapshot.get("hash") ) - intermediate_snapshot = SnapshotWrapper.from_json_object(temp_obj) + intermediate_snapshot = SnapshotWrapper.from_json_object(file_content) if ( intermediate_snapshot.version != self._metadata["timestamp"].snapshot["version"] ): - temp_obj.close() raise exceptions.BadVersionNumberError # Check for an arbitrary software attack @@ -588,7 +584,6 @@ def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: target_role["version"] != self._metadata["snapshot"].meta[target_role]["version"] ): - temp_obj.close() raise exceptions.BadVersionNumberError intermediate_snapshot.expires() @@ -596,7 +591,7 @@ def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: return intermediate_snapshot def _verify_targets( - self, temp_obj: TextIO, filename: str, parent_role: str + self, file_content: bytes, filename: str, parent_role: str ) -> TargetsWrapper: """ TODO @@ -605,15 +600,14 @@ def _verify_targets( # Check against timestamp metadata if self._metadata["snapshot"].role(filename).get("hash"): _check_hashes( - temp_obj, self._metadata["snapshot"].targets.get("hash") + file_content, self._metadata["snapshot"].targets.get("hash") ) - intermediate_targets = TargetsWrapper.from_json_object(temp_obj) + intermediate_targets = TargetsWrapper.from_json_object(file_content) if ( intermediate_targets.version != self._metadata["snapshot"].role(filename)["version"] ): - temp_obj.close() raise exceptions.BadVersionNumberError # Check for an arbitrary software attack @@ -627,15 +621,6 @@ def _verify_targets( return intermediate_targets - @staticmethod - def _verify_target_file(temp_obj: BinaryIO, targetinfo: Dict) -> None: - """ - TODO - """ - - _check_file_length(temp_obj, targetinfo["fileinfo"]["length"]) - _check_hashes(temp_obj, targetinfo["fileinfo"]["hashes"]) - def _preorder_depth_first_walk(self, target_filepath) -> Dict: """ TODO @@ -864,7 +849,25 @@ def _check_file_length(file_object, trusted_file_length): ) -def _check_hashes(file_object, trusted_hashes): +def _check_hashes_obj(file_object, trusted_hashes): + """ + TODO + """ + for algorithm, trusted_hash in trusted_hashes.items(): + digest_object = sslib_hash.digest_fileobject(file_object, algorithm) + + computed_hash = digest_object.hexdigest() + + # Raise an exception if any of the hashes are incorrect. + if trusted_hash != computed_hash: + raise sslib_exceptions.BadHashError(trusted_hash, computed_hash) + + logger.info( + "The file's " + algorithm + " hash is" " correct: " + trusted_hash + ) + + +def _check_hashes(file_content, trusted_hashes): """ TODO """ @@ -872,11 +875,8 @@ def _check_hashes(file_object, trusted_hashes): # return. for algorithm, trusted_hash in trusted_hashes.items(): digest_object = sslib_hash.digest(algorithm) - # Ensure we read from the beginning of the file object - # TODO: should we store file position (before the loop) and reset - # after we seek about? - file_object.seek(0) - digest_object.update(file_object.read()) + + digest_object.update(file_content) computed_hash = digest_object.hexdigest() # Raise an exception if any of the hashes are incorrect. From dd110988b257a1be7f30f14ceb9794d6466ef0e6 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Tue, 27 Apr 2021 14:07:35 +0300 Subject: [PATCH 29/86] Replace sslib exceptions Replace sslib exceptions raised in Updater with the corresponding ones defined in tuf. Signed-off-by: Teodora Sechkova --- tuf/client_rework/updater_rework.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 435a21485f..c6872c6478 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -860,7 +860,7 @@ def _check_hashes_obj(file_object, trusted_hashes): # Raise an exception if any of the hashes are incorrect. if trusted_hash != computed_hash: - raise sslib_exceptions.BadHashError(trusted_hash, computed_hash) + raise exceptions.BadHashError(trusted_hash, computed_hash) logger.info( "The file's " + algorithm + " hash is" " correct: " + trusted_hash @@ -881,7 +881,7 @@ def _check_hashes(file_content, trusted_hashes): # Raise an exception if any of the hashes are incorrect. if trusted_hash != computed_hash: - raise sslib_exceptions.BadHashError(trusted_hash, computed_hash) + raise exceptions.BadHashError(trusted_hash, computed_hash) logger.info( "The file's " + algorithm + " hash is" " correct: " + trusted_hash From 80ff532be1e4134e05a11287d31fc88ddf78edf3 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 29 Apr 2021 09:28:06 +0300 Subject: [PATCH 30/86] Download metadata from a single mirror Keep the current API and mirrors configuration but use only the first mirror from the list for metadata download. Target files download remains unchanged. Signed-off-by: Teodora Sechkova --- tuf/client_rework/updater_rework.py | 208 ++++++++++------------------ 1 file changed, 77 insertions(+), 131 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index c6872c6478..8a643204a3 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -223,37 +223,42 @@ def _load_root(self) -> None: for next_version in range(lower_bound, upper_bound): try: + # Get the list of mirrors but we'll use only the first one root_mirrors = mirrors.get_list_of_mirrors( "meta", f"{next_version}.root.json", self._mirrors, ) + temp_obj = None # For each version of root iterate over the list of mirrors # until an intermediate root is successfully downloaded and # verified. - intermediate_root = self._root_mirrors_download(root_mirrors) - - # Exit the loop when all mirrors have raised only 403 / 404 errors, - # which indicates that a bigger root version does not exist. - except exceptions.NoWorkingMirrorError as exception: - for mirror_error in exception.mirror_errors.values(): - # Otherwise, reraise the error, because it is not a simple - # HTTP error. - if neither_403_nor_404(mirror_error): - logger.info( - "Misc error for root version " + str(next_version) - ) - raise + temp_obj = download.download_file( + root_mirrors[0], + settings.DEFAULT_ROOT_REQUIRED_LENGTH, + self._fetcher, + strict_required_length=False, + ) - logger.debug("HTTP error for root version " + str(next_version)) - # If we are here, then we ran into only 403 / 404 errors, which - # are good reasons to suspect that the next root metadata file - # does not exist. + temp_obj.seek(0) + intermediate_root = self._verify_root(temp_obj.read()) + # TODO: persist should happen here for each intermediate + # root according to the spec + + except exceptions.FetcherHTTPError as exception: + if exception.status_code not in {403, 404}: + raise + # Stop looking for a bigger version if "File not found" + # error is received break - # Continue only if a newer root version is found - if intermediate_root is not None: + finally: + if temp_obj: + temp_obj.close() + temp_obj = None + + if intermediate_root: # Check for a freeze attack. The latest known time MUST be lower # than the expiration timestamp in the trusted root metadata file # TODO define which exceptions are part of the public API @@ -288,52 +293,6 @@ def _load_root(self) -> None: "root" ].signed.consistent_snapshot - def _root_mirrors_download(self, root_mirrors: Dict) -> "RootWrapper": - """Iterate over the list of "root_mirrors" until an intermediate - root is successfully downloaded and verified. - Raise "NoWorkingMirrorError" if a root file cannot be downloaded or - verified from any mirror""" - - file_mirror_errors = {} - temp_obj = None - intermediate_root = None - - for root_mirror in root_mirrors: - try: - temp_obj = download.download_file( - root_mirror, - settings.DEFAULT_ROOT_REQUIRED_LENGTH, - self._fetcher, - strict_required_length=False, - ) - - temp_obj.seek(0) - intermediate_root = self._verify_root(temp_obj.read()) - # When we reach this point, a root file has been successfully - # downloaded and verified so we can exit the loop. - break - - # pylint cannot figure out that we store the exceptions - # in a dictionary to raise them later so we disable - # the warning. This should be reviewed in the future still. - except Exception as exception: # pylint: disable=broad-except - # Store the exceptions until all mirrors are iterated. - # If an exception is raised from one mirror but a valid - # file is found in the next one, the first exception is ignored. - file_mirror_errors[root_mirror] = exception - - finally: - if temp_obj: - temp_obj.close() - temp_obj = None - - if not intermediate_root: - # If all mirrors are tried but a valid root file is not found, - # then raise an exception with the stored errors - raise exceptions.NoWorkingMirrorError(file_mirror_errors) - - return intermediate_root - def _load_timestamp(self) -> None: """ TODO @@ -344,31 +303,27 @@ def _load_timestamp(self) -> None: "meta", "timestamp.json", self._mirrors ) - file_mirror_errors = {} verified_timestamp = None - for file_mirror in file_mirrors: - try: - temp_obj = download.download_file( - file_mirror, - settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, - self._fetcher, - strict_required_length=False, - ) - - temp_obj.seek(0) - verified_timestamp = self._verify_timestamp(temp_obj.read()) - break + temp_obj = None + try: + temp_obj = download.download_file( + file_mirrors[0], + settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, + self._fetcher, + strict_required_length=False, + ) - except Exception as exception: # pylint: disable=broad-except - file_mirror_errors[file_mirror] = exception + temp_obj.seek(0) + verified_timestamp = self._verify_timestamp(temp_obj.read()) - finally: - if temp_obj: - temp_obj.close() - temp_obj = None + except Exception as e: + # TODO: do we reraise a NoWorkingMirrorError or just + # let exceptions propagate? + raise exceptions.NoWorkingMirrorError({file_mirrors[0]: e}) from e - if not verified_timestamp: - raise exceptions.NoWorkingMirrorError(file_mirror_errors) + finally: + if temp_obj: + temp_obj.close() self._metadata["timestamp"] = verified_timestamp # Persist root metadata. The client MUST write the file to @@ -393,36 +348,31 @@ def _load_snapshot(self) -> None: # version = None # TODO: Check if exists locally - file_mirrors = mirrors.get_list_of_mirrors( "meta", "snapshot.json", self._mirrors ) - file_mirror_errors = {} verified_snapshot = False - for file_mirror in file_mirrors: - try: - temp_obj = download.download_file( - file_mirror, - length, - self._fetcher, - strict_required_length=False, - ) + temp_obj = None + try: + temp_obj = download.download_file( + file_mirrors[0], + length, + self._fetcher, + strict_required_length=False, + ) - temp_obj.seek(0) - verified_snapshot = self._verify_snapshot(temp_obj.read()) - break + temp_obj.seek(0) + verified_snapshot = self._verify_snapshot(temp_obj.read()) - except Exception as exception: # pylint: disable=broad-except - file_mirror_errors[file_mirror] = exception + except Exception as e: + # TODO: do we reraise a NoWorkingMirrorError or just + # let exceptions propagate? + raise exceptions.NoWorkingMirrorError({file_mirrors[0]: e}) from e - finally: - if temp_obj: - temp_obj.close() - temp_obj = None - - if not verified_snapshot: - raise exceptions.NoWorkingMirrorError(file_mirror_errors) + finally: + if temp_obj: + temp_obj.close() self._metadata["snapshot"] = verified_snapshot # Persist root metadata. The client MUST write the file to @@ -452,33 +402,29 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: "meta", f"{targets_role}.json", self._mirrors ) - file_mirror_errors = {} verified_targets = False - for file_mirror in file_mirrors: - try: - temp_obj = download.download_file( - file_mirror, - length, - self._fetcher, - strict_required_length=False, - ) - - temp_obj.seek(0) - verified_targets = self._verify_targets( - temp_obj.read(), targets_role, parent_role - ) - break + temp_obj = None + try: + temp_obj = download.download_file( + file_mirrors[0], + length, + self._fetcher, + strict_required_length=False, + ) - except Exception as exception: # pylint: disable=broad-except - file_mirror_errors[file_mirror] = exception + temp_obj.seek(0) + verified_targets = self._verify_targets( + temp_obj.read(), targets_role, parent_role + ) - finally: - if temp_obj: - temp_obj.close() - temp_obj = None + except Exception as e: + # TODO: do we reraise a NoWorkingMirrorError or just + # let exceptions propagate? + raise exceptions.NoWorkingMirrorError({file_mirrors[0]: e}) from e - if not verified_targets: - raise exceptions.NoWorkingMirrorError(file_mirror_errors) + finally: + if temp_obj: + temp_obj.close() self._metadata[targets_role] = verified_targets # Persist root metadata. The client MUST write the file to From 576d055cd402a5297424563a77b0a5a96304275b Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 29 Apr 2021 12:07:25 +0300 Subject: [PATCH 31/86] Drop mirrors support Updater now uses only a single url for metadata download. Target files download use either a default url or an optional one for each file passed by the caller. Signed-off-by: Teodora Sechkova --- tests/test_updater_rework.py | 9 +- tuf/client_rework/mirrors.py | 130 -------------------------- tuf/client_rework/updater_rework.py | 136 +++++++++++++--------------- 3 files changed, 66 insertions(+), 209 deletions(-) delete mode 100644 tuf/client_rework/mirrors.py diff --git a/tests/test_updater_rework.py b/tests/test_updater_rework.py index bc6ce3a3f1..162fa5b1f9 100644 --- a/tests/test_updater_rework.py +++ b/tests/test_updater_rework.py @@ -123,14 +123,13 @@ def setUp(self): # directory copied from the original repository files. tuf.settings.repositories_directory = self.client_directory - self.repository_mirrors = {'mirror1': {'url_prefix': url_prefix, - 'metadata_path': 'metadata', - 'targets_path': 'targets'}} - + metadata_url = os.path.join(url_prefix, 'metadata/') + targets_url = os.path.join(url_prefix, 'targets/') # Creating a repository instance. The test cases will use this client # updater to refresh metadata, fetch target files, etc. self.repository_updater = updater.Updater(self.repository_name, - self.repository_mirrors) + metadata_url, + targets_url) # Metadata role keys are needed by the test cases to make changes to the # repository (e.g., adding a new target file to 'targets.json' and then diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py deleted file mode 100644 index 83991e64e6..0000000000 --- a/tuf/client_rework/mirrors.py +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2012 - 2017, New York University and the TUF contributors -# SPDX-License-Identifier: MIT OR Apache-2.0 - -""" - - mirrors.py - - - Konstantin Andrianov. - Derived from original mirrors.py written by Geremy Condra. - - - March 12, 2012. - - - See LICENSE-MIT OR LICENSE for licensing information. - - - Extract a list of mirror urls corresponding to the file type and the location - of the file with respect to the base url. -""" - -import os -from urllib import parse - -from securesystemslib import exceptions as sslib_exceptions -from securesystemslib import formats as sslib_formats -from securesystemslib import util as sslib_util - -from tuf import formats - -# The type of file to be downloaded from a repository. The -# 'get_list_of_mirrors' function supports these file types. -_SUPPORTED_FILE_TYPES = ["meta", "target"] - - -def get_list_of_mirrors(file_type, file_path, mirrors_dict): - """ - - Get a list of mirror urls from a mirrors dictionary, provided the type - and the path of the file with respect to the base url. - - - file_type: - Type of data needed for download, must correspond to one of the strings - in the list ['meta', 'target']. 'meta' for metadata file type or - 'target' for target file type. It should correspond to - NAME_SCHEMA format. - - file_path: - A relative path to the file that corresponds to RELPATH_SCHEMA format. - Ex: 'http://url_prefix/targets_path/file_path' - - mirrors_dict: - A mirrors_dict object that corresponds to MIRRORDICT_SCHEMA, where - keys are strings and values are MIRROR_SCHEMA. An example format - of MIRROR_SCHEMA: - - {'url_prefix': 'http://localhost:8001', - 'metadata_path': 'metadata/', - 'targets_path': 'targets/', - 'confined_target_dirs': ['targets/snapshot1/', ...], - 'custom': {...}} - - The 'custom' field is optional. - - - securesystemslib.exceptions.Error, on unsupported 'file_type'. - - securesystemslib.exceptions.FormatError, on bad argument. - - - List of mirror urls corresponding to the file_type and file_path. If no - match is found, empty list is returned. - """ - - # Checking if all the arguments have appropriate format. - formats.RELPATH_SCHEMA.check_match(file_path) - formats.MIRRORDICT_SCHEMA.check_match(mirrors_dict) - sslib_formats.NAME_SCHEMA.check_match(file_type) - - # Verify 'file_type' is supported. - if file_type not in _SUPPORTED_FILE_TYPES: - raise sslib_exceptions.Error( - "Invalid file_type argument." - " Supported file types: " + repr(_SUPPORTED_FILE_TYPES) - ) - path_key = "metadata_path" if file_type == "meta" else "targets_path" - - # Reference to 'securesystemslib.util.file_in_confined_directories()' - # (improve readability). This function checks whether a mirror should - # serve a file to the client. A client may be confined to certain paths - # on a repository mirror when fetching target files. This field may be set - # by the client when the repository mirror is added to the - # 'tuf.client.updater.Updater' object. - in_confined_directory = sslib_util.file_in_confined_directories - - list_of_mirrors = [] - for mirror_info in mirrors_dict.values(): - # Does mirror serve this file type at all? - path = mirror_info.get(path_key) - if path is None: - continue - - # for targets, ensure directory confinement - if path_key == "targets_path": - full_filepath = os.path.join(path, file_path) - confined_target_dirs = mirror_info.get("confined_target_dirs") - # confined_target_dirs is an optional field - if confined_target_dirs and not in_confined_directory( - full_filepath, confined_target_dirs - ): - continue - - # urllib.quote(string) replaces special characters in string using - # the %xx escape. This is done to avoid parsing issues of the URL - # on the server side. Do *NOT* pass URLs with Unicode characters without - # first encoding the URL as UTF-8. Needed a long-term solution with #61. - # http://bugs.python.org/issue1712522 - file_path = parse.quote(file_path) - url = os.path.join(mirror_info["url_prefix"], path, file_path) - - # The above os.path.join() result as well as input file_path may be - # invalid on windows (might contain both separator types), see #1077. - # Make sure the URL doesn't contain backward slashes on Windows. - list_of_mirrors.append(url.replace("\\", "/")) - - return list_of_mirrors diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 8a643204a3..9fbae6edc1 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -11,6 +11,7 @@ import logging import os from typing import Dict, Optional +from urllib import parse from securesystemslib import exceptions as sslib_exceptions from securesystemslib import hash as sslib_hash @@ -18,7 +19,7 @@ from tuf import exceptions, settings from tuf.client.fetcher import FetcherInterface -from tuf.client_rework import download, mirrors, requests_fetcher +from tuf.client_rework import download, requests_fetcher from .metadata_wrapper import ( RootWrapper, @@ -35,27 +36,26 @@ class Updater: """ Provides a class that can download target files securely. - Attributes: - metadata: - - repository_name: - - mirrors: - - fetcher: - - consistent_snapshot: + TODO """ def __init__( self, repository_name: str, - repository_mirrors: Dict, + metadata_url: str, + default_target_url: Optional[str] = None, fetcher: Optional[FetcherInterface] = None, ): self._repository_name = repository_name - self._mirrors = repository_mirrors + self._metadata_url = metadata_url + # Should we accept metadata url as a default for targets or + # targets_url should be provided either in this constructor + # or as a download_target parameter? + if default_target_url is None: + self._default_target_url = metadata_url + else: + self._default_target_url = default_target_url self._consistent_snapshot = False self._metadata = {} @@ -142,44 +142,38 @@ def updated_targets(targets: Dict, destination_directory: str) -> Dict: return updated_targets - def download_target(self, target: Dict, destination_directory: str): + def download_target( + self, + targetinfo: Dict, + destination_directory: str, + target_url: Optional[str] = None, + ): """ This method performs the actual download of the specified target. The file is saved to the 'destination_directory' argument. """ + if target_url is None: + target_url = self._default_target_url - temp_obj = None - file_mirror_errors = {} - file_mirrors = mirrors.get_list_of_mirrors( - "target", target["filepath"], self._mirrors - ) + full_url = _build_full_url(target_url, targetinfo["filepath"]) - for file_mirror in file_mirrors: - try: - temp_obj = download.download_file( - file_mirror, target["fileinfo"]["length"], self._fetcher - ) - _check_file_length(temp_obj, target["fileinfo"]["length"]) - temp_obj.seek(0) - _check_hashes_obj(temp_obj, target["fileinfo"]["hashes"]) - break - - except Exception as exception: # pylint: disable=broad-except - # Store the exceptions until all mirrors are iterated. - # If an exception is raised from one mirror but a valid - # file is found in the next one, the first exception is ignored. - file_mirror_errors[file_mirror] = exception - - if temp_obj: - temp_obj.close() - temp_obj = None + temp_obj = None + try: + temp_obj = download.download_file( + full_url, targetinfo["fileinfo"]["length"], self._fetcher + ) + _check_file_length(temp_obj, targetinfo["fileinfo"]["length"]) + temp_obj.seek(0) + _check_hashes_obj(temp_obj, targetinfo["fileinfo"]["hashes"]) - # If all mirrors are iterated but a file object is not successfully - # downloaded and verifies, raise the collected errors - if not temp_obj: - raise exceptions.NoWorkingMirrorError(file_mirror_errors) + except Exception as e: + if temp_obj: + temp_obj.close() + # TODO: do we reraise a NoWorkingMirrorError or just + # let exceptions propagate? + raise exceptions.NoWorkingMirrorError({full_url: e}) from e - filepath = os.path.join(destination_directory, target["filepath"]) + filepath = os.path.join(destination_directory, targetinfo["filepath"]) sslib_util.persist_temp_file(temp_obj, filepath) temp_obj.close() @@ -223,19 +217,15 @@ def _load_root(self) -> None: for next_version in range(lower_bound, upper_bound): try: - # Get the list of mirrors but we'll use only the first one - root_mirrors = mirrors.get_list_of_mirrors( - "meta", - f"{next_version}.root.json", - self._mirrors, + root_url = _build_full_url( + self._metadata_url, f"{next_version}.root.json" ) - temp_obj = None # For each version of root iterate over the list of mirrors # until an intermediate root is successfully downloaded and # verified. temp_obj = download.download_file( - root_mirrors[0], + root_url, settings.DEFAULT_ROOT_REQUIRED_LENGTH, self._fetcher, strict_required_length=False, @@ -298,16 +288,12 @@ def _load_timestamp(self) -> None: TODO """ # TODO Check if timestamp exists locally - - file_mirrors = mirrors.get_list_of_mirrors( - "meta", "timestamp.json", self._mirrors - ) - + timestamp_url = _build_full_url(self._metadata_url, "timestamp.json") verified_timestamp = None temp_obj = None try: temp_obj = download.download_file( - file_mirrors[0], + timestamp_url, settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, self._fetcher, strict_required_length=False, @@ -319,7 +305,7 @@ def _load_timestamp(self) -> None: except Exception as e: # TODO: do we reraise a NoWorkingMirrorError or just # let exceptions propagate? - raise exceptions.NoWorkingMirrorError({file_mirrors[0]: e}) from e + raise exceptions.NoWorkingMirrorError({timestamp_url: e}) from e finally: if temp_obj: @@ -348,15 +334,12 @@ def _load_snapshot(self) -> None: # version = None # TODO: Check if exists locally - file_mirrors = mirrors.get_list_of_mirrors( - "meta", "snapshot.json", self._mirrors - ) - + snapshot_url = _build_full_url(self._metadata_url, "snapshot.json") verified_snapshot = False temp_obj = None try: temp_obj = download.download_file( - file_mirrors[0], + snapshot_url, length, self._fetcher, strict_required_length=False, @@ -368,7 +351,7 @@ def _load_snapshot(self) -> None: except Exception as e: # TODO: do we reraise a NoWorkingMirrorError or just # let exceptions propagate? - raise exceptions.NoWorkingMirrorError({file_mirrors[0]: e}) from e + raise exceptions.NoWorkingMirrorError({snapshot_url: e}) from e finally: if temp_obj: @@ -398,15 +381,14 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: # TODO: Check if exists locally - file_mirrors = mirrors.get_list_of_mirrors( - "meta", f"{targets_role}.json", self._mirrors + targets_url = _build_full_url( + self._metadata_url, f"{targets_role}.json" ) - verified_targets = False temp_obj = None try: temp_obj = download.download_file( - file_mirrors[0], + targets_url, length, self._fetcher, strict_required_length=False, @@ -420,7 +402,7 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: except Exception as e: # TODO: do we reraise a NoWorkingMirrorError or just # let exceptions propagate? - raise exceptions.NoWorkingMirrorError({file_mirrors[0]: e}) from e + raise exceptions.NoWorkingMirrorError({targets_url: e}) from e finally: if temp_obj: @@ -849,11 +831,17 @@ def _get_target_hash(target_filepath, hash_function="sha256"): return target_filepath_hash -def neither_403_nor_404(mirror_error): +def _build_full_url(base_url, filepath): """ - TODO + Build a full “absolute" URL by combining a base URL with + a relative file path. """ - if isinstance(mirror_error, exceptions.FetcherHTTPError): - if mirror_error.status_code in {403, 404}: - return False - return True + # Are these steps enough? Or too much? Is this the right place? + filepath = parse.quote(filepath) + # Assuming that base_url ends with a '/' character, otherwise parse.urljoin + # omits (correcly) the last part of the base URL path + full_url = parse.urljoin(base_url, filepath) + # Avoid windows path separators. Should we keep this check? Or correct + # paths should be required from the user? + # full_url.replace("\\", "/") + return full_url From 45259cfdb22dd55ed85fb1a63e815a9d8ac4ad4c Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Tue, 4 May 2021 10:33:03 +0300 Subject: [PATCH 32/86] new updater: Seek to beginning of file after length check Signed-off-by: Jussi Kukkonen --- tuf/client_rework/updater_rework.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 9fbae6edc1..93aa22fca5 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -163,7 +163,6 @@ def download_target( full_url, targetinfo["fileinfo"]["length"], self._fetcher ) _check_file_length(temp_obj, targetinfo["fileinfo"]["length"]) - temp_obj.seek(0) _check_hashes_obj(temp_obj, targetinfo["fileinfo"]["hashes"]) except Exception as e: @@ -766,6 +765,7 @@ def _check_file_length(file_object, trusted_file_length): """ file_object.seek(0, 2) observed_length = file_object.tell() + file_object.seek(0) # Return and log a message if the length 'file_object' is equal to # 'trusted_file_length', otherwise raise an exception. A hard check From 5732163229ffa42dee6239f339496749ae84f04f Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Tue, 4 May 2021 11:08:08 +0300 Subject: [PATCH 33/86] new updater: remove unnecessary file objects Removing mirrors means we no longer need to do file object handling manually. Note that this means we're now exposing the Updater caller to all kinds of new exceptions (as NoWorkingMirrorError is no longer an excuse we can use). Signed-off-by: Jussi Kukkonen --- tuf/client_rework/download.py | 12 +++ tuf/client_rework/updater_rework.py | 135 +++++++--------------------- 2 files changed, 46 insertions(+), 101 deletions(-) diff --git a/tuf/client_rework/download.py b/tuf/client_rework/download.py index ec7b3e1ec0..fc53b70e12 100644 --- a/tuf/client_rework/download.py +++ b/tuf/client_rework/download.py @@ -138,9 +138,21 @@ def download_file(url, required_length, fetcher, strict_required_length=True): raise else: + temp_file.seek(0) return temp_file +def download_bytes(url, required_length, fetcher, strict_required_length=True): + """Download bytes from given url + + Returns the downloaded bytes, otherwise like download_file() + """ + with download_file( + url, required_length, fetcher, strict_required_length + ) as dl_file: + return dl_file.read() + + def _check_downloaded_length( total_downloaded, required_length, diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 93aa22fca5..d964525806 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -157,24 +157,16 @@ def download_target( full_url = _build_full_url(target_url, targetinfo["filepath"]) - temp_obj = None - try: - temp_obj = download.download_file( - full_url, targetinfo["fileinfo"]["length"], self._fetcher + with download.download_file( + full_url, targetinfo["fileinfo"]["length"], self._fetcher + ) as target_file: + _check_file_length(target_file, targetinfo["fileinfo"]["length"]) + _check_hashes_obj(target_file, targetinfo["fileinfo"]["hashes"]) + + filepath = os.path.join( + destination_directory, targetinfo["filepath"] ) - _check_file_length(temp_obj, targetinfo["fileinfo"]["length"]) - _check_hashes_obj(temp_obj, targetinfo["fileinfo"]["hashes"]) - - except Exception as e: - if temp_obj: - temp_obj.close() - # TODO: do we reraise a NoWorkingMirrorError or just - # let exceptions propagate? - raise exceptions.NoWorkingMirrorError({full_url: e}) from e - - filepath = os.path.join(destination_directory, targetinfo["filepath"]) - sslib_util.persist_temp_file(temp_obj, filepath) - temp_obj.close() + sslib_util.persist_temp_file(target_file, filepath) def _get_full_meta_name( self, role: str, extension: str = ".json", version: int = None @@ -219,19 +211,17 @@ def _load_root(self) -> None: root_url = _build_full_url( self._metadata_url, f"{next_version}.root.json" ) - temp_obj = None # For each version of root iterate over the list of mirrors # until an intermediate root is successfully downloaded and # verified. - temp_obj = download.download_file( + data = download.download_bytes( root_url, settings.DEFAULT_ROOT_REQUIRED_LENGTH, self._fetcher, strict_required_length=False, ) - temp_obj.seek(0) - intermediate_root = self._verify_root(temp_obj.read()) + intermediate_root = self._verify_root(data) # TODO: persist should happen here for each intermediate # root according to the spec @@ -242,11 +232,6 @@ def _load_root(self) -> None: # error is received break - finally: - if temp_obj: - temp_obj.close() - temp_obj = None - if intermediate_root: # Check for a freeze attack. The latest known time MUST be lower # than the expiration timestamp in the trusted root metadata file @@ -288,31 +273,13 @@ def _load_timestamp(self) -> None: """ # TODO Check if timestamp exists locally timestamp_url = _build_full_url(self._metadata_url, "timestamp.json") - verified_timestamp = None - temp_obj = None - try: - temp_obj = download.download_file( - timestamp_url, - settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, - self._fetcher, - strict_required_length=False, - ) - - temp_obj.seek(0) - verified_timestamp = self._verify_timestamp(temp_obj.read()) - - except Exception as e: - # TODO: do we reraise a NoWorkingMirrorError or just - # let exceptions propagate? - raise exceptions.NoWorkingMirrorError({timestamp_url: e}) from e - - finally: - if temp_obj: - temp_obj.close() - - self._metadata["timestamp"] = verified_timestamp - # Persist root metadata. The client MUST write the file to - # non-volatile storage as FILENAME.EXT (e.g. root.json). + data = download.download_bytes( + timestamp_url, + settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, + self._fetcher, + strict_required_length=False, + ) + self._metadata["timestamp"] = self._verify_timestamp(data) self._metadata["timestamp"].persist( self._get_full_meta_name("timestamp.json") ) @@ -334,31 +301,14 @@ def _load_snapshot(self) -> None: # TODO: Check if exists locally snapshot_url = _build_full_url(self._metadata_url, "snapshot.json") - verified_snapshot = False - temp_obj = None - try: - temp_obj = download.download_file( - snapshot_url, - length, - self._fetcher, - strict_required_length=False, - ) - - temp_obj.seek(0) - verified_snapshot = self._verify_snapshot(temp_obj.read()) - - except Exception as e: - # TODO: do we reraise a NoWorkingMirrorError or just - # let exceptions propagate? - raise exceptions.NoWorkingMirrorError({snapshot_url: e}) from e - - finally: - if temp_obj: - temp_obj.close() + data = download.download_bytes( + snapshot_url, + length, + self._fetcher, + strict_required_length=False, + ) - self._metadata["snapshot"] = verified_snapshot - # Persist root metadata. The client MUST write the file to - # non-volatile storage as FILENAME.EXT (e.g. root.json). + self._metadata["snapshot"] = self._verify_snapshot(data) self._metadata["snapshot"].persist( self._get_full_meta_name("snapshot.json") ) @@ -383,33 +333,16 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: targets_url = _build_full_url( self._metadata_url, f"{targets_role}.json" ) - verified_targets = False - temp_obj = None - try: - temp_obj = download.download_file( - targets_url, - length, - self._fetcher, - strict_required_length=False, - ) - - temp_obj.seek(0) - verified_targets = self._verify_targets( - temp_obj.read(), targets_role, parent_role - ) - - except Exception as e: - # TODO: do we reraise a NoWorkingMirrorError or just - # let exceptions propagate? - raise exceptions.NoWorkingMirrorError({targets_url: e}) from e - - finally: - if temp_obj: - temp_obj.close() + data = download.download_bytes( + targets_url, + length, + self._fetcher, + strict_required_length=False, + ) - self._metadata[targets_role] = verified_targets - # Persist root metadata. The client MUST write the file to - # non-volatile storage as FILENAME.EXT (e.g. root.json). + self._metadata[targets_role] = self._verify_targets( + data, targets_role, parent_role + ) self._metadata[targets_role].persist( self._get_full_meta_name(targets_role, extension=".json") ) From 9fae500f8716f5fba4d15796db86784243604902 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Tue, 4 May 2021 11:18:08 +0300 Subject: [PATCH 34/86] new updater: Rename _get_target_hash() The function actually hashes the target filepath. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/updater_rework.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index d964525806..c74b4afee1 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -640,7 +640,7 @@ def _visit_child_role(child_role: Dict, target_filepath: str) -> str: child_role_path_hash_prefixes = child_role.get("path_hash_prefixes") if child_role_path_hash_prefixes is not None: - target_filepath_hash = _get_target_hash(target_filepath) + target_filepath_hash = _get_filepath_hash(target_filepath) for child_role_path_hash_prefix in child_role_path_hash_prefixes: if not target_filepath_hash.startswith(child_role_path_hash_prefix): continue @@ -749,7 +749,7 @@ def _check_hashes(file_content, trusted_hashes): ) -def _get_target_hash(target_filepath, hash_function="sha256"): +def _get_filepath_hash(target_filepath, hash_function="sha256"): """ TODO """ From 888f022dbf08f09fb3b2dde0d964708e79ad049b Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Tue, 4 May 2021 11:20:21 +0300 Subject: [PATCH 35/86] new updater: Remove misleading comment Signed-off-by: Jussi Kukkonen --- tuf/client_rework/updater_rework.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index c74b4afee1..2fbdf4f21d 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -700,10 +700,6 @@ def _check_file_length(file_object, trusted_file_length): observed_length = file_object.tell() file_object.seek(0) - # Return and log a message if the length 'file_object' is equal to - # 'trusted_file_length', otherwise raise an exception. A hard check - # ensures that a downloaded file strictly matches a known, or trusted, - # file length. if observed_length != trusted_file_length: raise exceptions.DownloadLengthMismatchError( trusted_file_length, observed_length From 3a02583398a59a1d5104eb42405d8614e21eee7d Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Tue, 4 May 2021 11:31:23 +0300 Subject: [PATCH 36/86] Rename url prefixes so they are consistent Signed-off-by: Jussi Kukkonen --- tuf/client_rework/updater_rework.py | 37 +++++++++++++++-------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 2fbdf4f21d..f02f4f0ed3 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -42,20 +42,14 @@ class Updater: def __init__( self, repository_name: str, - metadata_url: str, - default_target_url: Optional[str] = None, + metadata_base_url: str, + target_base_url: Optional[str] = None, fetcher: Optional[FetcherInterface] = None, ): self._repository_name = repository_name - self._metadata_url = metadata_url - # Should we accept metadata url as a default for targets or - # targets_url should be provided either in this constructor - # or as a download_target parameter? - if default_target_url is None: - self._default_target_url = metadata_url - else: - self._default_target_url = default_target_url + self._metadata_base_url = metadata_base_url + self._target_base_url = target_base_url self._consistent_snapshot = False self._metadata = {} @@ -146,16 +140,21 @@ def download_target( self, targetinfo: Dict, destination_directory: str, - target_url: Optional[str] = None, + target_base_url: Optional[str] = None, ): """ This method performs the actual download of the specified target. The file is saved to the 'destination_directory' argument. """ - if target_url is None: - target_url = self._default_target_url + if target_base_url is None and self._target_base_url is None: + raise ValueError( + "target_base_url must be set in either download_target() or " + "constructor" + ) + elif target_base_url is None: + target_base_url = self._target_base_url - full_url = _build_full_url(target_url, targetinfo["filepath"]) + full_url = _build_full_url(target_base_url, targetinfo["filepath"]) with download.download_file( full_url, targetinfo["fileinfo"]["length"], self._fetcher @@ -209,7 +208,7 @@ def _load_root(self) -> None: for next_version in range(lower_bound, upper_bound): try: root_url = _build_full_url( - self._metadata_url, f"{next_version}.root.json" + self._metadata_base_url, f"{next_version}.root.json" ) # For each version of root iterate over the list of mirrors # until an intermediate root is successfully downloaded and @@ -272,7 +271,9 @@ def _load_timestamp(self) -> None: TODO """ # TODO Check if timestamp exists locally - timestamp_url = _build_full_url(self._metadata_url, "timestamp.json") + timestamp_url = _build_full_url( + self._metadata_base_url, "timestamp.json" + ) data = download.download_bytes( timestamp_url, settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, @@ -300,7 +301,7 @@ def _load_snapshot(self) -> None: # version = None # TODO: Check if exists locally - snapshot_url = _build_full_url(self._metadata_url, "snapshot.json") + snapshot_url = _build_full_url(self._metadata_base_url, "snapshot.json") data = download.download_bytes( snapshot_url, length, @@ -331,7 +332,7 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: # TODO: Check if exists locally targets_url = _build_full_url( - self._metadata_url, f"{targets_role}.json" + self._metadata_base_url, f"{targets_role}.json" ) data = download.download_bytes( targets_url, From ab210b410b8c2b716ce5b0505c196f2223d49b47 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Tue, 4 May 2021 11:55:08 +0300 Subject: [PATCH 37/86] new updater: Clean up url handling * Make sure all base urls (prefixes) end in a slash * Add documentation to get_one_valid_targetinfo(): That is the one place where the API accepts ill-defined "paths" from the caller * Remove checks from download url handling: we control both the base url and the relative path so there should be no surprises here. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/updater_rework.py | 56 ++++++++++++++--------------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index f02f4f0ed3..8de5cff708 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -46,10 +46,12 @@ def __init__( target_base_url: Optional[str] = None, fetcher: Optional[FetcherInterface] = None, ): - self._repository_name = repository_name - self._metadata_base_url = metadata_base_url - self._target_base_url = target_base_url + self._metadata_base_url = _ensure_trailing_slash(metadata_base_url) + if target_base_url is None: + self._target_base_url = None + else: + self._target_base_url = _ensure_trailing_slash(target_base_url) self._consistent_snapshot = False self._metadata = {} @@ -77,13 +79,18 @@ def refresh(self) -> None: self._load_snapshot() self._load_targets("targets", "root") - def get_one_valid_targetinfo(self, filename: str) -> Dict: + def get_one_valid_targetinfo(self, target_path: str) -> Dict: """ - Returns the target information for a specific file identified by its - file path. This target method also downloads the metadata of updated - targets. + Returns the target information for a target identified by target_path. + + As a side-effect this method downloads all the metadata it needs to + return the target information. + + Args: + target_path: A path-relative-URL string + (https://url.spec.whatwg.org/#path-relative-url-string) """ - return self._preorder_depth_first_walk(filename) + return self._preorder_depth_first_walk(target_path) @staticmethod def updated_targets(targets: Dict, destination_directory: str) -> Dict: @@ -151,10 +158,12 @@ def download_target( "target_base_url must be set in either download_target() or " "constructor" ) - elif target_base_url is None: + if target_base_url is None: target_base_url = self._target_base_url + else: + target_base_url = _ensure_trailing_slash(target_base_url) - full_url = _build_full_url(target_base_url, targetinfo["filepath"]) + full_url = parse.urljoin(target_base_url, targetinfo["filepath"]) with download.download_file( full_url, targetinfo["fileinfo"]["length"], self._fetcher @@ -207,7 +216,7 @@ def _load_root(self) -> None: for next_version in range(lower_bound, upper_bound): try: - root_url = _build_full_url( + root_url = parse.urljoin( self._metadata_base_url, f"{next_version}.root.json" ) # For each version of root iterate over the list of mirrors @@ -271,9 +280,7 @@ def _load_timestamp(self) -> None: TODO """ # TODO Check if timestamp exists locally - timestamp_url = _build_full_url( - self._metadata_base_url, "timestamp.json" - ) + timestamp_url = parse.urljoin(self._metadata_base_url, "timestamp.json") data = download.download_bytes( timestamp_url, settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, @@ -301,7 +308,7 @@ def _load_snapshot(self) -> None: # version = None # TODO: Check if exists locally - snapshot_url = _build_full_url(self._metadata_base_url, "snapshot.json") + snapshot_url = parse.urljoin(self._metadata_base_url, "snapshot.json") data = download.download_bytes( snapshot_url, length, @@ -331,7 +338,7 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: # TODO: Check if exists locally - targets_url = _build_full_url( + targets_url = parse.urljoin( self._metadata_base_url, f"{targets_role}.json" ) data = download.download_bytes( @@ -761,17 +768,6 @@ def _get_filepath_hash(target_filepath, hash_function="sha256"): return target_filepath_hash -def _build_full_url(base_url, filepath): - """ - Build a full “absolute" URL by combining a base URL with - a relative file path. - """ - # Are these steps enough? Or too much? Is this the right place? - filepath = parse.quote(filepath) - # Assuming that base_url ends with a '/' character, otherwise parse.urljoin - # omits (correcly) the last part of the base URL path - full_url = parse.urljoin(base_url, filepath) - # Avoid windows path separators. Should we keep this check? Or correct - # paths should be required from the user? - # full_url.replace("\\", "/") - return full_url +def _ensure_trailing_slash(url: str): + """Return url guaranteed to end in a slash""" + return url if url.endswith("/") else f"{url}/" From 9605e19db202f3166abeeab5f6a13889282697d6 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 7 May 2021 12:38:27 +0300 Subject: [PATCH 38/86] new updater: Improve docstrings Signed-off-by: Jussi Kukkonen --- tuf/client_rework/updater_rework.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 8de5cff708..e117e4fc2c 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -46,6 +46,16 @@ def __init__( target_base_url: Optional[str] = None, fetcher: Optional[FetcherInterface] = None, ): + """ + Args: + repository_name: directory name (within a local directory + defined by 'tuf.settings.repositories_directory') + metadata_base_url: Base URL for all remote metadata downloads + target_base_url: Optional; Default base URL for all remote target + downloads. Can be individually set in download_target() + fetcher: Optional; FetcherInterface implementation used to download + both metadata and targets. Default is RequestsFetcher + """ self._repository_name = repository_name self._metadata_base_url = _ensure_trailing_slash(metadata_base_url) if target_base_url is None: @@ -87,8 +97,10 @@ def get_one_valid_targetinfo(self, target_path: str) -> Dict: return the target information. Args: - target_path: A path-relative-URL string - (https://url.spec.whatwg.org/#path-relative-url-string) + target_path: A target identifier that is a path-relative-URL string + (https://url.spec.whatwg.org/#path-relative-url-string). + Typically this is also the unix file path of the eventually + downloaded file. """ return self._preorder_depth_first_walk(target_path) @@ -150,8 +162,15 @@ def download_target( target_base_url: Optional[str] = None, ): """ - This method performs the actual download of the specified target. - The file is saved to the 'destination_directory' argument. + Download target specified by 'targetinfo' into 'destination_directory'. + + Args: + targetinfo: data received from get_one_valid_targetinfo() + destination_directory: existing local directory to download into. + Note that new directories may be created inside + destination_directory as required. + target_base_url: Optional; Base URL used to form the final target + download URL. Default is the value provided in Updater() """ if target_base_url is None and self._target_base_url is None: raise ValueError( From ec4c5ce00d8c77e68fe4049d9e8b50c634ea06e5 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Wed, 12 May 2021 15:42:53 +0300 Subject: [PATCH 39/86] tests: Don't use os.path.join() for URLS The test has issues like this alsready but let's not add more... Signed-off-by: Jussi Kukkonen --- tests/test_updater_rework.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_updater_rework.py b/tests/test_updater_rework.py index 162fa5b1f9..b564fbf57e 100644 --- a/tests/test_updater_rework.py +++ b/tests/test_updater_rework.py @@ -123,8 +123,8 @@ def setUp(self): # directory copied from the original repository files. tuf.settings.repositories_directory = self.client_directory - metadata_url = os.path.join(url_prefix, 'metadata/') - targets_url = os.path.join(url_prefix, 'targets/') + metadata_url = f"{url_prefix}/metadata/" + targets_url = f"{url_prefix}/targets/" # Creating a repository instance. The test cases will use this client # updater to refresh metadata, fetch target files, etc. self.repository_updater = updater.Updater(self.repository_name, From fa6a798931f8d282bb4b7d33b43a9ea69c9a67bb Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 12 May 2021 17:31:21 +0300 Subject: [PATCH 40/86] Fix issues after merging develop Fix failing tests after merging develop into experimental-client. Signed-off-by: Teodora Sechkova --- tuf/client_rework/metadata_wrapper.py | 22 +++++++++++----------- tuf/client_rework/updater_rework.py | 13 +++++-------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/tuf/client_rework/metadata_wrapper.py b/tuf/client_rework/metadata_wrapper.py index b1dd2d1d58..5031003b2c 100644 --- a/tuf/client_rework/metadata_wrapper.py +++ b/tuf/client_rework/metadata_wrapper.py @@ -95,8 +95,8 @@ def keys(self, role): TODO """ keys = [] - for keyid in self._meta.signed.roles[role]["keyids"]: - key_metadata = self._meta.signed.keys[keyid] + for keyid in self._meta.signed.roles[role].keyids: + key_metadata = self._meta.signed.keys[keyid].to_dict() key, dummy = format_metadata_to_key(key_metadata) keys.append(key) @@ -106,7 +106,7 @@ def threshold(self, role): """ TODO """ - return self._meta.signed.roles[role]["threshold"] + return self._meta.signed.roles[role].threshold class TimestampWrapper(MetadataWrapper): @@ -158,11 +158,11 @@ def keys(self, role): TODO """ keys = [] - for delegation in self._meta.signed.delegations["roles"]: - if delegation["name"] == role: - for keyid in delegation["keyids"]: - key_metadata = self._meta.signed.delegations["keys"][keyid] - key, dummy = format_metadata_to_key(key_metadata) + for delegation in self._meta.signed.delegations.roles: + if delegation.name == role: + for keyid in delegation.keyids: + key_metadata = self._meta.signed.delegations.keys[keyid] + key, dummy = format_metadata_to_key(key_metadata.to_dict()) keys.append(key) return keys @@ -170,8 +170,8 @@ def threshold(self, role): """ TODO """ - for delegation in self._meta.signed.delegations["roles"]: - if delegation["name"] == role: - return delegation["threshold"] + for delegation in self._meta.signed.delegations.roles: + if delegation.name == role: + return delegation.threshold return None diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index e117e4fc2c..9f52bce41f 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -558,7 +558,7 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: # And also decrement number of visited roles. number_of_delegations -= 1 delegations = role_metadata.delegations - child_roles = delegations.get("roles", []) + child_roles = delegations.roles if target is None: @@ -570,10 +570,7 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: child_role, target_filepath ) - if ( - child_role["terminating"] - and child_role_name is not None - ): + if child_role.terminating and child_role_name is not None: msg = ( f"Adding child role {child_role_name}.\n", "Not backtracking to other roles.", @@ -662,9 +659,9 @@ def _visit_child_role(child_role: Dict, target_filepath: str) -> str: Otherwise, we return None. """ - child_role_name = child_role["name"] - child_role_paths = child_role.get("paths") - child_role_path_hash_prefixes = child_role.get("path_hash_prefixes") + child_role_name = child_role.name + child_role_paths = child_role.paths + child_role_path_hash_prefixes = child_role.path_hash_prefixes if child_role_path_hash_prefixes is not None: target_filepath_hash = _get_filepath_hash(target_filepath) From cd60e8100a31502549dfc40c91d08dff262b7000 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 13 May 2021 12:48:18 +0300 Subject: [PATCH 41/86] Prevent access to missing metadata fields Delegations are optional. Add checks to avoid accessing 'Delegations' object's members if its value is None. Signed-off-by: Teodora Sechkova --- tuf/client_rework/metadata_wrapper.py | 26 ++++++++++++++++---------- tuf/client_rework/updater_rework.py | 5 +++-- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/tuf/client_rework/metadata_wrapper.py b/tuf/client_rework/metadata_wrapper.py index 5031003b2c..fbc3335c3e 100644 --- a/tuf/client_rework/metadata_wrapper.py +++ b/tuf/client_rework/metadata_wrapper.py @@ -158,20 +158,26 @@ def keys(self, role): TODO """ keys = [] - for delegation in self._meta.signed.delegations.roles: - if delegation.name == role: - for keyid in delegation.keyids: - key_metadata = self._meta.signed.delegations.keys[keyid] - key, dummy = format_metadata_to_key(key_metadata.to_dict()) - keys.append(key) - return keys + if self._meta.signed.delegations is not None: + for delegation in self._meta.signed.delegations.roles: + if delegation.name == role: + for keyid in delegation.keyids: + key_metadata = self._meta.signed.delegations.keys[keyid] + key, dummy = format_metadata_to_key( + key_metadata.to_dict() + ) + keys.append(key) + return keys + + return keys def threshold(self, role): """ TODO """ - for delegation in self._meta.signed.delegations.roles: - if delegation.name == role: - return delegation.threshold + if self._meta.signed.delegations is not None: + for delegation in self._meta.signed.delegations.roles: + if delegation.name == role: + return delegation.threshold return None diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 9f52bce41f..fbf3778ee7 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -557,8 +557,9 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: # And also decrement number of visited roles. number_of_delegations -= 1 - delegations = role_metadata.delegations - child_roles = delegations.roles + child_roles = [] + if role_metadata.delegations is not None: + child_roles = role_metadata.delegations.roles if target is None: From 3b161bfe20ba20703dd850e8538f0d43fd3d578a Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Tue, 20 Apr 2021 09:46:41 +0300 Subject: [PATCH 42/86] experimental client: Add MetadataBundle MetadataBundle keeps track of current valid set of metadata for the client, and handles almost every step of the "Detailed client workflow" in the TUF specification (the remaining steps are download related). The bundle takes care of persisting valid metadata on disk, loading local metadata from disk and deleting invalid local metadata. It also verifies any new metadata (downloaded from remote repository) it is given. This is very much a work-in-progress. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 349 +++++++++++++++++++++++++++ 1 file changed, 349 insertions(+) create mode 100644 tuf/client_rework/metadata_bundle.py diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py new file mode 100644 index 0000000000..f54335b5ef --- /dev/null +++ b/tuf/client_rework/metadata_bundle.py @@ -0,0 +1,349 @@ +# Copyright the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""TUF client bundle-of-metadata + +MetadataBundle keeps track of current valid set of metadata for the client, +and handles almost every step of the "Detailed client workflow" in the TUF +specification (the remaining steps are download related). The bundle takes +care of persisting valid metadata on disk, loading local metadata from disk +and deleting invalid local metadata. + +New metadata (downloaded from a remote repository) can be loaded using +'update_metadata()'. The type of accepted metadata depends on bundle state +(states are "root"/"timestamp"/"snapshot"/"targets"/). Bundle states advances +to next state on every successful metadata update, except for "root" where state +only advances when 'root_update_finished()' is called. Exceptions will be thrown +if metadata fails to load in any way. + +Example (with hypothetical download function): + +>>> # Load local root +>>> bundle = MetadataBundle("path/to/metadata") +>>> +>>> # state: "root", load more root versions from remote +>>> with download("root", bundle.root.signed.version + 1) as f: +>>> bundle.load_metadata(f.read()) +>>> with download("root", bundle.root.signed.version + 1) as f: +>>> bundle.load_metadata(f.read()) +>>> +>>> # Finally, no more root from remote +>>> bundle.root_update_finished() +>>> +>>> # state: "timestamp", load timestamp +>>> with download("timestamp") as f: +>>> bundle.load_metadata(f.read()) +>>> +>>> # state: "snapshot", load snapshot (consistent snapshot not shown) +>>> with download("snapshot") as f: +>>> bundle.load_metadata(f.read()) +>>> +>>> # state: "targets", load targets +>>> version = bundle.snapshot.signed.meta["targets.json"]["version"] +>>> with download("snapshot", version + 1) as f: +>>> bundle.load_metadata(f.read()) +>>> +>>> # Top level metadata is now fully loaded and verified + + +TODO: + * Delegated targets not implement yet + * exceptions are all over the place and not thought out at all + * a bit of repetition + * No tests! + * Naming maybe not final? + * some metadata interactions might work better in Metadata itself + * Progress through Specification update process should be documented + (not sure yet how) +""" + +from collections import abc +from datetime import datetime +import logging +import os +from typing import Dict + +from securesystemslib import keys as sslib_hash +from securesystemslib import keys as sslib_keys + +from tuf import exceptions +from tuf.api.metadata import Metadata + +logger = logging.getLogger(__name__) + +# This is a placeholder until ... +# TODO issue 1306: implement this in Metadata API +def verify_with_threshold(root: Metadata, role: str, unverified: Metadata): + unique_keys = set() + for keyid in root.signed.roles[role]["keyids"]: + key_metadata = root.signed.keys[keyid] + key, _ = sslib_keys.format_metadata_to_key(key_metadata) + + try: + if unverified.verify(key): + unique_keys.add(key["keyval"]["public"]) + except: + pass + + return len(unique_keys) >= root.signed.roles[role]["threshold"] + + +# TODO issue 1336: implement in metadata api +from tuf.api.serialization.json import JSONDeserializer + + +def from_string(data: str) -> Metadata: + return JSONDeserializer().deserialize(data) + + +class MetadataBundle(abc.Mapping): + def __init__(self, path: str): + """Initialize by loading existing metadata from disk + + This includes root, timestamp, snapshot and _top-level_ targets . + """ + self._path = path + self._bundle = {} # type: Dict[str: Metadata] + self._state = "root" + self.reference_time = None + + if not os.path.exists(path): + # TODO try to create dir instead? + raise exceptions.RepositoryError("Repository does not exist") + + # Load and validate the local root metadata + # Valid root metadata is required (but invalid files are not removed) + try: + with open(os.path.join(self._path, "root.json"), "rb") as f: + self._load_intermediate_root(f.read()) + logger.debug("Loaded local root.json") + except: + raise exceptions.RepositoryError("Failed to load local root metadata") + + def update_metadata(self, metadata_str: str): + logger.debug("Updating %s", self._state) + if self._state == "root": + self._load_intermediate_root(metadata_str) + self.root.to_file(os.path.join(self._path, "root.json")) + elif self._state == "timestamp": + self._load_timestamp(metadata_str) + self.timestamp.to_file(os.path.join(self._path, "timestamp.json")) + self._state = "snapshot" + elif self._state == "snapshot": + self._load_snapshot(metadata_str) + self.snapshot.to_file(os.path.join(self._path, "snapshot.json")) + self._state = "targets" + elif self._state == "targets": + self._load_targets(metadata_str) + self.targets.to_file(os.path.join(self._path, "targets.json")) + self._state = "" + else: + raise NotImplementedError + + def root_update_finished(self): + if self._state != "root": + # bundle does not support this order of ops + raise exceptions.RepositoryError + + self._make_root_permanent(self) + self._state = "timestamp" + + # Implement Mapping + def __getitem__(self, key: str): + return self._bundle[key] + + def __len__(self): + return len(self._bundle) + + def __iter__(self): + return iter(self._bundle) + + # Helper properties for top level metadata + @property + def root(self): + return self._bundle.get("root") + + @property + def timestamp(self): + return self._bundle.get("timestamp") + + @property + def snapshot(self): + return self._bundle.get("snapshot") + + @property + def targets(self): + return self._bundle.get("targets") + + def _load_intermediate_root(self, data: str): + """Verify the new root using current root (if any) and use it as current root + + Raises if root fails verification + """ + new_root = from_string(data) + if new_root.signed._type != "root": + raise exceptions.RepositoryError + + if self.root is not None: + if not verify_with_threshold(self.root, "root", new_root): + raise exceptions.UnsignedMetadataError( + "New root is not signed by root", new_root.signed + ) + + if new_root.signed.version != self.root.signed.version + 1: + # TODO not a "Replayed Metadata attack": the version is just not what we expected + raise exceptions.ReplayedMetadataError( + "root", new_root.signed.version, self.root.signed.version + ) + + if not verify_with_threshold(new_root, "root", new_root): + raise exceptions.UnsignedMetadataError( + "New root is not signed by itself", new_root.signed + ) + + self._bundle["root"] = new_root + + def _make_root_permanent(self): + # Store our reference "now", verify root expiry + self.reference_time = datetime.utcnow() + if self.root.signed.is_expired(self.reference_time): + raise exceptions.ExpiredMetadataError + + logger.debug("Verified final root.json") + + # Load remaning local metadata: this ensures invalid + # metadata gets wiped from disk + try: + with open(os.path.join(self._path, "timestamp.json"), "rb") as f: + self._load_timestamp(f.read()) + logger.debug("Loaded local timestamp.json") + except Exception as e: + # TODO only handle specific errors + logger.debug("Failed to load local timestamp.json") + # TODO delete local file + + try: + with open(os.path.join(self._path, "snapshot.json"), "rb") as f: + self._load_snapshot(f.read()) + logger.debug("Loaded local snapshot.json") + except Exception as e: + # TODO only handle specific errors + logger.debug("Failed to load local snapshot.json") + # TODO delete local file + + try: + with open(os.path.join(self._path, "targets.json"), "rb") as f: + self._load_targets(f.read()) + logger.debug("Loaded local targets.json") + except Exception as e: + # TODO only handle specific errors + logger.debug("Failed to load local targets.json") + # TODO delete local file + + def _load_timestamp(self, data: str): + """Verifies the new timestamp and uses it as current timestamp + + Raises if verification fails + """ + new_timestamp = from_string(data) + if new_timestamp.signed._type != "timestamp": + raise exceptions.RepositoryError + + if not verify_with_threshold(self.root, "timestamp", new_timestamp): + raise exceptions.UnsignedMetadataError( + "New timestamp is not signed by root", new_timestamp.signed + ) + + if self.timestamp is not None: + # Prevent rolling back timestamp version + if new_timestamp.signed.version < self.timestamp.signed.version: + raise exceptions.ReplayedMetadataError( + "timestamp", + new_timestamp.signed.version, + self.timestamp.signed.version, + ) + # Prevent rolling back snapshot version + if ( + new_timestamp.signed.meta["snapshot.json"]["version"] + < self.timestamp.signed.meta["snapshot.json"]["version"] + ): + # TODO not sure about the + raise exceptions.ReplayedMetadataError( + "snapshot", + new_timestamp.signed.meta["snapshot.json"]["version"], + self.timestamp.signed.meta["snapshot.json"]["version"], + ) + + if new_timestamp.signed.is_expired(self.reference_time): + raise exceptions.ExpiredMetadataError + + self._bundle["timestamp"] = new_timestamp + + def _load_snapshot(self, data: str): + # Verify against the hashes in timestamp, if any + meta = self.timestamp.signed.meta["snapshot.json"] + hashes = meta.get("hashes") or {} + for algo, _hash in meta["hashes"].items(): + digest_object = sslib_hash.digest(algo) + digest_object.update(data) + if digest_object.hexdigest() != _hash: + raise exceptions.BadHashError() + new_snapshot = from_string(data) + if new_snapshot.signed._type != "snapshot": + raise exceptions.RepositoryError + + if not verify_with_threshold(self.root, "snapshot", new_snapshot): + raise exceptions.UnsignedMetadataError( + "New snapshot is not signed by root", new_snapshot.signed + ) + + if ( + new_snapshot.signed.version + != self.timestamp.signed.meta["snapshot.json"]["version"] + ): + raise exceptions.BadVersionNumberError + + if self.snapshot: + for filename, fileinfo in self.snapshot.signed.meta.items(): + new_fileinfo = new_snapshot.signed.meta.get(filename) + + # Prevent removal of any metadata in meta + if new_fileinfo is None: + raise exceptions.ReplayedMetadataError + + # Prevent rollback of any metadata versions + if new_fileinfo["version"] < fileinfo["version"]: + raise exceptions.ReplayedMetadataError + + if new_snapshot.signed.is_expired(self.reference_time): + raise exceptions.ExpiredMetadataError + + self._bundle["snapshot"] = new_snapshot + + def _load_targets(self, data: str): + # Verify against the hashes in snapshot, if any + meta = self.snapshot.signed.meta["targets.json"] + + hashes = meta.get("hashes") or {} + for algo, _hash in hashes.items(): + digest_object = sslib_hash.digest(algo) + digest_object.update(data) + if digest_object.hexdigest() != _hash: + raise exceptions.BadHashError() + + new_targets = from_string(data) + if new_targets.signed._type != "targets": + raise exceptions.RepositoryError + + if not verify_with_threshold(self.root, "targets", new_targets): + raise exceptions.UnsignedMetadataError( + "New targets is not signed by root", new_targets.signed + ) + + if new_targets.signed.version != meta["version"]: + raise exceptions.BadVersionNumberError + + if new_targets.signed.is_expired(self.reference_time): + raise exceptions.ExpiredMetadataError + + self._bundle["targets"] = new_targets From 5e1fe0d4b501d1ca7acb864e6603b2c0367526af Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Tue, 20 Apr 2021 11:14:43 +0300 Subject: [PATCH 43/86] MetadataBundle: Update outdated docstring Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index f54335b5ef..ecf509168c 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -98,9 +98,7 @@ def from_string(data: str) -> Metadata: class MetadataBundle(abc.Mapping): def __init__(self, path: str): - """Initialize by loading existing metadata from disk - - This includes root, timestamp, snapshot and _top-level_ targets . + """Initialize by loading root metadata from disk """ self._path = path self._bundle = {} # type: Dict[str: Metadata] From 3f7c40524cefc737501c5e045cd6b57730120178 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Tue, 20 Apr 2021 21:10:52 +0300 Subject: [PATCH 44/86] MetadataBundle: Modify state handling A single state variable cannot really handle the case where we may want load local metadata and may want to updater from remote -- but are not required to do either. Instead, make sure two rules apply: * Metadata can only be loaded/updated if all metadata it depends on is loaded * Metadata becomes immutable when anything that depends on it is loaded So, loading a new timestamp is possible if root is loaded and snapshot is not loaded. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 193 +++++++++++++++++---------- 1 file changed, 119 insertions(+), 74 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index ecf509168c..b4e9211d4c 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -10,38 +10,43 @@ and deleting invalid local metadata. New metadata (downloaded from a remote repository) can be loaded using -'update_metadata()'. The type of accepted metadata depends on bundle state -(states are "root"/"timestamp"/"snapshot"/"targets"/). Bundle states advances -to next state on every successful metadata update, except for "root" where state -only advances when 'root_update_finished()' is called. Exceptions will be thrown -if metadata fails to load in any way. +'update_metadata()'. The type of accepted metadata depends on bundle state: + * Metadata is loadable only if all metadata it depends on is loaded + * Metadata is immutable if any metadata depending on it has been loaded + +Exceptions are raised if metadata fails to load in any way (except in the +case of local loads -- see locad_local_metadata()). Example (with hypothetical download function): >>> # Load local root >>> bundle = MetadataBundle("path/to/metadata") >>> ->>> # state: "root", load more root versions from remote +>>> # load more root versions from remote >>> with download("root", bundle.root.signed.version + 1) as f: ->>> bundle.load_metadata(f.read()) +>>> bundle.update_metadata(f.read()) >>> with download("root", bundle.root.signed.version + 1) as f: ->>> bundle.load_metadata(f.read()) +>>> bundle.update_metadata(f.read()) >>> ->>> # Finally, no more root from remote +>>> # Finally, no more roots from remote >>> bundle.root_update_finished() >>> ->>> # state: "timestamp", load timestamp +>>> # load local timestamp, then update it +>>> bundle.load_local_metadata("timestamp") >>> with download("timestamp") as f: ->>> bundle.load_metadata(f.read()) +>>> bundle.update_metadata(f.read()) >>> ->>> # state: "snapshot", load snapshot (consistent snapshot not shown) ->>> with download("snapshot") as f: ->>> bundle.load_metadata(f.read()) +>>> # load local snapshot, then update it if needed +>>> if not bundle.load_local_metadata("snapshot"): +>>> # load snapshot (consistent snapshot not shown) +>>> with download("snapshot") as f: +>>> bundle.update_metadata(f.read()) >>> ->>> # state: "targets", load targets ->>> version = bundle.snapshot.signed.meta["targets.json"]["version"] ->>> with download("snapshot", version + 1) as f: ->>> bundle.load_metadata(f.read()) +>>> # load local targets, then update it if needed +>>> if not bundle.load_local_metadata("targets"): +>>> version = bundle.snapshot.signed.meta["targets.json"]["version"] +>>> with download("snapshot", version + 1) as f: +>>> bundle.update_metadata(f.read()) >>> >>> # Top level metadata is now fully loaded and verified @@ -102,7 +107,6 @@ def __init__(self, path: str): """ self._path = path self._bundle = {} # type: Dict[str: Metadata] - self._state = "root" self.reference_time = None if not os.path.exists(path): @@ -110,41 +114,103 @@ def __init__(self, path: str): raise exceptions.RepositoryError("Repository does not exist") # Load and validate the local root metadata - # Valid root metadata is required (but invalid files are not removed) - try: - with open(os.path.join(self._path, "root.json"), "rb") as f: - self._load_intermediate_root(f.read()) - logger.debug("Loaded local root.json") - except: + # Valid root metadata is required + if not self.load_local_metadata("root"): raise exceptions.RepositoryError("Failed to load local root metadata") - def update_metadata(self, metadata_str: str): - logger.debug("Updating %s", self._state) - if self._state == "root": + def load_local_metadata(self, role_name: str, delegator_name: str = None) -> bool: + """Loads metadata from local storage and inserts into bundle + + If bundle already contains 'role_name', nothing is loaded. + Failure to read the file, failure to parse it and failure to + load it as valid metadata will not raise exceptions: the function + will just fail. + + Returns True if 'role_name' is now in the bundle + """ + if self.get(role_name) is not None: + logger.debug("Already loaded %s.json", role_name) + return True + + logger.debug("Loading local %s.json", role_name) + + self._raise_on_unsupported_state(role_name) + + try: + with open(os.path.join(self._path, f"{role_name}.json"), "rb") as f: + data = f.read() + + if role_name == "root": + self._load_intermediate_root(data) + elif role_name == "timestamp": + self._load_timestamp(data) + elif role_name == "snapshot": + self._load_snapshot(data) + elif role_name == "targets": + self._load_targets(data) + else: + self._load_delegate(data, delegator_name) + + return True + except Exception as e: + # TODO only handle specific errors + logger.debug("Failed to load local %s.json", role_name) + # TODO delete local file (except probably should not delete root.json?) + return False + + def update_metadata(self, metadata_str: str, role_name: str, delegator_name: str = None): + logger.debug("Updating %s", role_name) + + self._raise_on_unsupported_state(role_name) + + if role_name == "root": self._load_intermediate_root(metadata_str) self.root.to_file(os.path.join(self._path, "root.json")) - elif self._state == "timestamp": + elif role_name == "timestamp": self._load_timestamp(metadata_str) self.timestamp.to_file(os.path.join(self._path, "timestamp.json")) - self._state = "snapshot" - elif self._state == "snapshot": + elif role_name == "snapshot": self._load_snapshot(metadata_str) self.snapshot.to_file(os.path.join(self._path, "snapshot.json")) - self._state = "targets" - elif self._state == "targets": + elif role_name == "targets": self._load_targets(metadata_str) self.targets.to_file(os.path.join(self._path, "targets.json")) - self._state = "" else: raise NotImplementedError def root_update_finished(self): - if self._state != "root": + if self.timestamp is not None: # bundle does not support this order of ops raise exceptions.RepositoryError - self._make_root_permanent(self) - self._state = "timestamp" + # Store our reference "now", verify root expiry + self.reference_time = datetime.utcnow() + if self.root.signed.is_expired(self.reference_time): + raise exceptions.ExpiredMetadataError + + logger.debug("Verified final root.json") + + def _raise_on_unsupported_state(self, role_name: str): + """Raise if updating 'role_name' is not supported at this state""" + if role_name == "root": + if self.timestamp is not None: + raise exceptions.RepositoryError + elif role_name == "timestamp": + if self.reference_time is None: + # root_update_finished() not called + raise exceptions.RepositoryError + if self.snapshot is not None: + raise exceptions.RepositoryError + elif role_name == "snapshot": + if self.targets is not None: + raise exceptions.RepositoryError + elif role_name == "targets": + if len(self) > 4: + # delegates have been loaded already + raise exceptions.RepositoryError + else: + if self.targets is None: + raise exceptions.RepositoryError # Implement Mapping def __getitem__(self, key: str): @@ -201,48 +267,15 @@ def _load_intermediate_root(self, data: str): self._bundle["root"] = new_root - def _make_root_permanent(self): - # Store our reference "now", verify root expiry - self.reference_time = datetime.utcnow() - if self.root.signed.is_expired(self.reference_time): - raise exceptions.ExpiredMetadataError - - logger.debug("Verified final root.json") - - # Load remaning local metadata: this ensures invalid - # metadata gets wiped from disk - try: - with open(os.path.join(self._path, "timestamp.json"), "rb") as f: - self._load_timestamp(f.read()) - logger.debug("Loaded local timestamp.json") - except Exception as e: - # TODO only handle specific errors - logger.debug("Failed to load local timestamp.json") - # TODO delete local file - - try: - with open(os.path.join(self._path, "snapshot.json"), "rb") as f: - self._load_snapshot(f.read()) - logger.debug("Loaded local snapshot.json") - except Exception as e: - # TODO only handle specific errors - logger.debug("Failed to load local snapshot.json") - # TODO delete local file - - try: - with open(os.path.join(self._path, "targets.json"), "rb") as f: - self._load_targets(f.read()) - logger.debug("Loaded local targets.json") - except Exception as e: - # TODO only handle specific errors - logger.debug("Failed to load local targets.json") - # TODO delete local file - def _load_timestamp(self, data: str): """Verifies the new timestamp and uses it as current timestamp Raises if verification fails """ + if self.root is None: + # bundle does not support this order of ops + raise exceptions.RepositoryError + new_timestamp = from_string(data) if new_timestamp.signed._type != "timestamp": raise exceptions.RepositoryError @@ -278,6 +311,10 @@ def _load_timestamp(self, data: str): self._bundle["timestamp"] = new_timestamp def _load_snapshot(self, data: str): + if self.root is None or self.timestamp is None: + # bundle does not support this order of ops + raise exceptions.RepositoryError + # Verify against the hashes in timestamp, if any meta = self.timestamp.signed.meta["snapshot.json"] hashes = meta.get("hashes") or {} @@ -319,6 +356,10 @@ def _load_snapshot(self, data: str): self._bundle["snapshot"] = new_snapshot def _load_targets(self, data: str): + if self.root is None or self.snapshot is None: + # bundle does not support this order of ops + raise exceptions.RepositoryError + # Verify against the hashes in snapshot, if any meta = self.snapshot.signed.meta["targets.json"] @@ -345,3 +386,7 @@ def _load_targets(self, data: str): raise exceptions.ExpiredMetadataError self._bundle["targets"] = new_targets + + + def _load_delegate(self, data: str, delegator_name: str = None): + pass \ No newline at end of file From 62284549eeab1d75ddb2a01d4a32e16a09ed3175 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Wed, 21 Apr 2021 15:41:12 +0300 Subject: [PATCH 45/86] MetadataBundle: implement delegates support Also make sure we always try loading local data for the metadata that gets security benefits from it. Also update the threshold verification (placeholder) to support targets as well as root. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 80 +++++++++++++++++++--------- 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index b4e9211d4c..f965ec9460 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -78,19 +78,34 @@ # This is a placeholder until ... # TODO issue 1306: implement this in Metadata API -def verify_with_threshold(root: Metadata, role: str, unverified: Metadata): +def verify_with_threshold(delegator: Metadata, role_name: str, unverified: Metadata): + if delegator.signed._type == 'root': + keys = delegator.signed.keys + role = delegator.signed.roles.get(role_name) + elif delegator.signed._type == 'targets': + keys = delegator.signed.delegations["keys"] + # role names are unique: first match is enough + roles = delegator.signed.delegations["roles"] + role = next((role for role in roles if role["name"] == role_name), None) + else: + raise ValueError('Call is valid only on delegator metadata') + + if role is None: + raise exceptions.UnknownRoleError + + # verify that delegate is signed by correct threshold of unique keys unique_keys = set() - for keyid in root.signed.roles[role]["keyids"]: - key_metadata = root.signed.keys[keyid] - key, _ = sslib_keys.format_metadata_to_key(key_metadata) + for keyid in role["keyids"]: + key_metadata = keys[keyid] + key, dummy = sslib_keys.format_metadata_to_key(key_metadata) try: if unverified.verify(key): unique_keys.add(key["keyval"]["public"]) - except: + except: # TODO specify the Exceptions pass - return len(unique_keys) >= root.signed.roles[role]["threshold"] + return len(unique_keys) >= role["threshold"] # TODO issue 1336: implement in metadata api @@ -149,7 +164,7 @@ def load_local_metadata(self, role_name: str, delegator_name: str = None) -> boo elif role_name == "targets": self._load_targets(data) else: - self._load_delegate(data, delegator_name) + self._load_delegated_targets(data, role_name, delegator_name) return True except Exception as e: @@ -164,19 +179,23 @@ def update_metadata(self, metadata_str: str, role_name: str, delegator_name: str self._raise_on_unsupported_state(role_name) if role_name == "root": + self.load_local_metadata("root") self._load_intermediate_root(metadata_str) self.root.to_file(os.path.join(self._path, "root.json")) elif role_name == "timestamp": + self.load_local_metadata("timestamp") self._load_timestamp(metadata_str) self.timestamp.to_file(os.path.join(self._path, "timestamp.json")) elif role_name == "snapshot": + self.load_local_metadata("snapshot") self._load_snapshot(metadata_str) self.snapshot.to_file(os.path.join(self._path, "snapshot.json")) elif role_name == "targets": self._load_targets(metadata_str) self.targets.to_file(os.path.join(self._path, "targets.json")) else: - raise NotImplementedError + self._load_delegated_targets(metadata_str, role_name, delegator_name) + self[role_name].to_file(os.path.join(self._path, f"{role_name}.json")) def root_update_finished(self): if self.timestamp is not None: @@ -202,13 +221,17 @@ def _raise_on_unsupported_state(self, role_name: str): if self.snapshot is not None: raise exceptions.RepositoryError elif role_name == "snapshot": + if self.timestamp is None: + raise exceptions.RepositoryError if self.targets is not None: raise exceptions.RepositoryError elif role_name == "targets": + if self.snapshot is None: + raise exceptions.RepositoryError if len(self) > 4: # delegates have been loaded already raise exceptions.RepositoryError - else: + else: # delegated role if self.targets is None: raise exceptions.RepositoryError @@ -266,6 +289,7 @@ def _load_intermediate_root(self, data: str): ) self._bundle["root"] = new_root + logger.debug("Loaded root") def _load_timestamp(self, data: str): """Verifies the new timestamp and uses it as current timestamp @@ -309,6 +333,7 @@ def _load_timestamp(self, data: str): raise exceptions.ExpiredMetadataError self._bundle["timestamp"] = new_timestamp + logger.debug("Loaded timestamp") def _load_snapshot(self, data: str): if self.root is None or self.timestamp is None: @@ -316,7 +341,10 @@ def _load_snapshot(self, data: str): raise exceptions.RepositoryError # Verify against the hashes in timestamp, if any - meta = self.timestamp.signed.meta["snapshot.json"] + meta = self.timestamp.signed.meta.get("snapshot.json") + if meta is None: + raise exceptions.RepositoryError + hashes = meta.get("hashes") or {} for algo, _hash in meta["hashes"].items(): digest_object = sslib_hash.digest(algo) @@ -354,14 +382,21 @@ def _load_snapshot(self, data: str): raise exceptions.ExpiredMetadataError self._bundle["snapshot"] = new_snapshot + logger.debug("Loaded snapshot") def _load_targets(self, data: str): - if self.root is None or self.snapshot is None: - # bundle does not support this order of ops + self._load_delegated_targets(data, "targets", "root") + + def _load_delegated_targets(self, data: str, role_name: str, delegator_name: str): + logger.debug(f"Loading {role_name} delegated by {delegator_name}") + delegator = self.get(delegator_name) + if delegator == None: raise exceptions.RepositoryError # Verify against the hashes in snapshot, if any - meta = self.snapshot.signed.meta["targets.json"] + meta = self.snapshot.signed.meta.get(f"{role_name}.json") + if meta is None: + raise exceptions.RepositoryError hashes = meta.get("hashes") or {} for algo, _hash in hashes.items(): @@ -370,23 +405,20 @@ def _load_targets(self, data: str): if digest_object.hexdigest() != _hash: raise exceptions.BadHashError() - new_targets = from_string(data) - if new_targets.signed._type != "targets": + new_delegate = from_string(data) + if new_delegate.signed._type != "targets": raise exceptions.RepositoryError - if not verify_with_threshold(self.root, "targets", new_targets): + if not verify_with_threshold(delegator, role_name, new_delegate): raise exceptions.UnsignedMetadataError( - "New targets is not signed by root", new_targets.signed + f"New {role_name} is not signed by {delegator_name}" ) - if new_targets.signed.version != meta["version"]: + if new_delegate.signed.version != meta["version"]: raise exceptions.BadVersionNumberError - if new_targets.signed.is_expired(self.reference_time): + if new_delegate.signed.is_expired(self.reference_time): raise exceptions.ExpiredMetadataError - self._bundle["targets"] = new_targets - - - def _load_delegate(self, data: str, delegator_name: str = None): - pass \ No newline at end of file + self._bundle[role_name] = new_delegate + logger.debug("Loaded {role_name}") From f503ebeb17326d3d224c6a571e35edf19f6d1dfc Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Wed, 21 Apr 2021 15:52:19 +0300 Subject: [PATCH 46/86] MetadataBundle: use Metadata.from_bytes() Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 40 +++++++++++----------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index f965ec9460..978d12c9ed 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -108,14 +108,6 @@ def verify_with_threshold(delegator: Metadata, role_name: str, unverified: Metad return len(unique_keys) >= role["threshold"] -# TODO issue 1336: implement in metadata api -from tuf.api.serialization.json import JSONDeserializer - - -def from_string(data: str) -> Metadata: - return JSONDeserializer().deserialize(data) - - class MetadataBundle(abc.Mapping): def __init__(self, path: str): """Initialize by loading root metadata from disk @@ -144,7 +136,7 @@ def load_local_metadata(self, role_name: str, delegator_name: str = None) -> boo Returns True if 'role_name' is now in the bundle """ if self.get(role_name) is not None: - logger.debug("Already loaded %s.json", role_name) + logger.debug("Already loaded local %s.json", role_name) return True logger.debug("Loading local %s.json", role_name) @@ -173,28 +165,28 @@ def load_local_metadata(self, role_name: str, delegator_name: str = None) -> boo # TODO delete local file (except probably should not delete root.json?) return False - def update_metadata(self, metadata_str: str, role_name: str, delegator_name: str = None): + def update_metadata(self, data: bytes, role_name: str, delegator_name: str = None): logger.debug("Updating %s", role_name) self._raise_on_unsupported_state(role_name) if role_name == "root": self.load_local_metadata("root") - self._load_intermediate_root(metadata_str) + self._load_intermediate_root(data) self.root.to_file(os.path.join(self._path, "root.json")) elif role_name == "timestamp": self.load_local_metadata("timestamp") - self._load_timestamp(metadata_str) + self._load_timestamp(data) self.timestamp.to_file(os.path.join(self._path, "timestamp.json")) elif role_name == "snapshot": self.load_local_metadata("snapshot") - self._load_snapshot(metadata_str) + self._load_snapshot(data) self.snapshot.to_file(os.path.join(self._path, "snapshot.json")) elif role_name == "targets": - self._load_targets(metadata_str) + self._load_targets(data) self.targets.to_file(os.path.join(self._path, "targets.json")) else: - self._load_delegated_targets(metadata_str, role_name, delegator_name) + self._load_delegated_targets(data, role_name, delegator_name) self[role_name].to_file(os.path.join(self._path, f"{role_name}.json")) def root_update_finished(self): @@ -262,12 +254,12 @@ def snapshot(self): def targets(self): return self._bundle.get("targets") - def _load_intermediate_root(self, data: str): + def _load_intermediate_root(self, data: bytes): """Verify the new root using current root (if any) and use it as current root Raises if root fails verification """ - new_root = from_string(data) + new_root = Metadata.from_bytes(data) if new_root.signed._type != "root": raise exceptions.RepositoryError @@ -291,7 +283,7 @@ def _load_intermediate_root(self, data: str): self._bundle["root"] = new_root logger.debug("Loaded root") - def _load_timestamp(self, data: str): + def _load_timestamp(self, data: bytes): """Verifies the new timestamp and uses it as current timestamp Raises if verification fails @@ -300,7 +292,7 @@ def _load_timestamp(self, data: str): # bundle does not support this order of ops raise exceptions.RepositoryError - new_timestamp = from_string(data) + new_timestamp = Metadata.from_bytes(data) if new_timestamp.signed._type != "timestamp": raise exceptions.RepositoryError @@ -335,7 +327,7 @@ def _load_timestamp(self, data: str): self._bundle["timestamp"] = new_timestamp logger.debug("Loaded timestamp") - def _load_snapshot(self, data: str): + def _load_snapshot(self, data: bytes): if self.root is None or self.timestamp is None: # bundle does not support this order of ops raise exceptions.RepositoryError @@ -351,7 +343,7 @@ def _load_snapshot(self, data: str): digest_object.update(data) if digest_object.hexdigest() != _hash: raise exceptions.BadHashError() - new_snapshot = from_string(data) + new_snapshot = Metadata.from_bytes(data) if new_snapshot.signed._type != "snapshot": raise exceptions.RepositoryError @@ -384,10 +376,10 @@ def _load_snapshot(self, data: str): self._bundle["snapshot"] = new_snapshot logger.debug("Loaded snapshot") - def _load_targets(self, data: str): + def _load_targets(self, data: bytes): self._load_delegated_targets(data, "targets", "root") - def _load_delegated_targets(self, data: str, role_name: str, delegator_name: str): + def _load_delegated_targets(self, data: bytes, role_name: str, delegator_name: str): logger.debug(f"Loading {role_name} delegated by {delegator_name}") delegator = self.get(delegator_name) if delegator == None: @@ -405,7 +397,7 @@ def _load_delegated_targets(self, data: str, role_name: str, delegator_name: str if digest_object.hexdigest() != _hash: raise exceptions.BadHashError() - new_delegate = from_string(data) + new_delegate = Metadata.from_bytes(data) if new_delegate.signed._type != "targets": raise exceptions.RepositoryError From c1afe57ae846e34237d4acef7e1fa9a88ab9504f Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Thu, 22 Apr 2021 10:13:02 +0300 Subject: [PATCH 47/86] MetadataBundle: Require load_local_metadata() Change the rules a bit: require calling load_local_metadata() before update_metadata() is legal. This removes the need to run load_local_metadata() "just to be sure" in update_metadata() Improve comments Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 76 ++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 21 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 978d12c9ed..65645d67dd 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -9,13 +9,23 @@ care of persisting valid metadata on disk, loading local metadata from disk and deleting invalid local metadata. -New metadata (downloaded from a remote repository) can be loaded using -'update_metadata()'. The type of accepted metadata depends on bundle state: - * Metadata is loadable only if all metadata it depends on is loaded - * Metadata is immutable if any metadata depending on it has been loaded +Loaded metadata can be accessed via the index access with rolename as key +or, in the case of top-level metadata using the helper properties like +'MetadataBundle.root' + +Metadata can be loaded into bundle by two means: + * loading from local storage: load_local_metadata() + (and, in the case of root metadata, the constuctor) + * updating from remote repository: update_metadata() + +The rules for top-level metadata are + * Metadata is loadable only if metadata it depends on is loaded + * Metadata is immutable if any metadata depending on it has been loaded + * Loading from local storage must be attempted before updating from remote + * Updating from remote is never required Exceptions are raised if metadata fails to load in any way (except in the -case of local loads -- see locad_local_metadata()). +case of local loads -- see load_local_metadata()). Example (with hypothetical download function): @@ -52,14 +62,24 @@ TODO: - * Delegated targets not implement yet + * Delegated targets are implemented but they are not covered + by same immutability guarantees: the top-level metadata is handled + by hard-coded rules (can't update root if snapshot is loaded) + but delegations would require storing the delegation tree ... * exceptions are all over the place and not thought out at all + * usefulness of root_update_finished() can be debated: it could be done + in the beginning of _load_timestamp()... + * there are some divergences from spec: + * 5.3.11: timestamp and snapshot are not deleted right away (only on next load): + the load functions will refuse to load the files when they are not signed by + current root keys. Deleting at the specified point is possible but means additional + code with some quirks.. * a bit of repetition * No tests! * Naming maybe not final? * some metadata interactions might work better in Metadata itself * Progress through Specification update process should be documented - (not sure yet how) + (not sure yet how: maybe a spec_logger that logs specification events?) """ from collections import abc @@ -79,16 +99,16 @@ # This is a placeholder until ... # TODO issue 1306: implement this in Metadata API def verify_with_threshold(delegator: Metadata, role_name: str, unverified: Metadata): - if delegator.signed._type == 'root': + if delegator.signed._type == "root": keys = delegator.signed.keys role = delegator.signed.roles.get(role_name) - elif delegator.signed._type == 'targets': + elif delegator.signed._type == "targets": keys = delegator.signed.delegations["keys"] # role names are unique: first match is enough roles = delegator.signed.delegations["roles"] role = next((role for role in roles if role["name"] == role_name), None) else: - raise ValueError('Call is valid only on delegator metadata') + raise ValueError("Call is valid only on delegator metadata") if role is None: raise exceptions.UnknownRoleError @@ -110,10 +130,10 @@ def verify_with_threshold(delegator: Metadata, role_name: str, unverified: Metad class MetadataBundle(abc.Mapping): def __init__(self, path: str): - """Initialize by loading root metadata from disk - """ + """Initialize by loading root metadata from disk""" self._path = path self._bundle = {} # type: Dict[str: Metadata] + self._local_load_attempted = {} self.reference_time = None if not os.path.exists(path): @@ -127,12 +147,14 @@ def __init__(self, path: str): def load_local_metadata(self, role_name: str, delegator_name: str = None) -> bool: """Loads metadata from local storage and inserts into bundle - + If bundle already contains 'role_name', nothing is loaded. Failure to read the file, failure to parse it and failure to load it as valid metadata will not raise exceptions: the function will just fail. + Raises if 'role_name' cannot be loaded from local storage at this state + Returns True if 'role_name' is now in the bundle """ if self.get(role_name) is not None: @@ -142,16 +164,17 @@ def load_local_metadata(self, role_name: str, delegator_name: str = None) -> boo logger.debug("Loading local %s.json", role_name) self._raise_on_unsupported_state(role_name) + self._local_load_attempted[role_name] = True try: with open(os.path.join(self._path, f"{role_name}.json"), "rb") as f: data = f.read() - + if role_name == "root": self._load_intermediate_root(data) - elif role_name == "timestamp": + elif role_name == "timestamp": self._load_timestamp(data) - elif role_name == "snapshot": + elif role_name == "snapshot": self._load_snapshot(data) elif role_name == "targets": self._load_targets(data) @@ -166,20 +189,26 @@ def load_local_metadata(self, role_name: str, delegator_name: str = None) -> boo return False def update_metadata(self, data: bytes, role_name: str, delegator_name: str = None): + """Takes new metadata (from remote repository) and loads it into bundle + + Raises if 'role_name' cannot be update from remote at this state + Raises if 'data' cannot be parsed or validated + Raises if the new metadata cannot be verified by the bundle + """ logger.debug("Updating %s", role_name) self._raise_on_unsupported_state(role_name) + if not self._local_load_attempted.get(role_name): + raise exceptions.RepositoryError + if role_name == "root": - self.load_local_metadata("root") self._load_intermediate_root(data) self.root.to_file(os.path.join(self._path, "root.json")) elif role_name == "timestamp": - self.load_local_metadata("timestamp") self._load_timestamp(data) self.timestamp.to_file(os.path.join(self._path, "timestamp.json")) elif role_name == "snapshot": - self.load_local_metadata("snapshot") self._load_snapshot(data) self.snapshot.to_file(os.path.join(self._path, "snapshot.json")) elif role_name == "targets": @@ -190,6 +219,11 @@ def update_metadata(self, data: bytes, role_name: str, delegator_name: str = Non self[role_name].to_file(os.path.join(self._path, f"{role_name}.json")) def root_update_finished(self): + """Marks root update as finished, validates the root metadata + + Raises if root update is not a valid operation at this state + Raises if validation fails + """ if self.timestamp is not None: # bundle does not support this order of ops raise exceptions.RepositoryError @@ -223,7 +257,7 @@ def _raise_on_unsupported_state(self, role_name: str): if len(self) > 4: # delegates have been loaded already raise exceptions.RepositoryError - else: # delegated role + else: # delegated role if self.targets is None: raise exceptions.RepositoryError @@ -413,4 +447,4 @@ def _load_delegated_targets(self, data: bytes, role_name: str, delegator_name: s raise exceptions.ExpiredMetadataError self._bundle[role_name] = new_delegate - logger.debug("Loaded {role_name}") + logger.debug(f"Loaded {role_name}") From e7a0febe1bfe28cccb18fb65037a00dfa1050b16 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Thu, 22 Apr 2021 18:42:33 +0300 Subject: [PATCH 48/86] Improve loading rules checks Make sure we do not load any metadata if delegates of that metadata are already loaded. Also remove duplicate checks. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 65645d67dd..e11c45e13f 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -237,9 +237,12 @@ def root_update_finished(self): def _raise_on_unsupported_state(self, role_name: str): """Raise if updating 'role_name' is not supported at this state""" + + # Special rules for top-level roles. We want to enforce a strict order + # root->snapshot->timestamp->targets where loading a metadata is no + # longer allowed when the next metadata in the order has been loaded if role_name == "root": - if self.timestamp is not None: - raise exceptions.RepositoryError + pass elif role_name == "timestamp": if self.reference_time is None: # root_update_finished() not called @@ -254,13 +257,19 @@ def _raise_on_unsupported_state(self, role_name: str): elif role_name == "targets": if self.snapshot is None: raise exceptions.RepositoryError - if len(self) > 4: - # delegates have been loaded already - raise exceptions.RepositoryError else: # delegated role if self.targets is None: raise exceptions.RepositoryError + # Generic rule: Updating a role is not allowed if + # * role is already loaded AND + # * role has a delegate that is already loaded + role = self.get(role_name) + if role is not None and role.signed.delegations is not None: + for delegate in role.signed.delegations["roles"]: + if self.get(delegate["name"]) is not None: + raise exceptions.RepositoryError + # Implement Mapping def __getitem__(self, key: str): return self._bundle[key] @@ -322,10 +331,6 @@ def _load_timestamp(self, data: bytes): Raises if verification fails """ - if self.root is None: - # bundle does not support this order of ops - raise exceptions.RepositoryError - new_timestamp = Metadata.from_bytes(data) if new_timestamp.signed._type != "timestamp": raise exceptions.RepositoryError @@ -362,9 +367,6 @@ def _load_timestamp(self, data: bytes): logger.debug("Loaded timestamp") def _load_snapshot(self, data: bytes): - if self.root is None or self.timestamp is None: - # bundle does not support this order of ops - raise exceptions.RepositoryError # Verify against the hashes in timestamp, if any meta = self.timestamp.signed.meta.get("snapshot.json") From cadbd844500822965346bb09814f37ef43ca7654 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Thu, 22 Apr 2021 19:08:50 +0300 Subject: [PATCH 49/86] Comment and variable name improvements Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index e11c45e13f..dcd16ca99a 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -62,10 +62,6 @@ TODO: - * Delegated targets are implemented but they are not covered - by same immutability guarantees: the top-level metadata is handled - by hard-coded rules (can't update root if snapshot is loaded) - but delegations would require storing the delegation tree ... * exceptions are all over the place and not thought out at all * usefulness of root_update_finished() can be debated: it could be done in the beginning of _load_timestamp()... @@ -129,14 +125,14 @@ def verify_with_threshold(delegator: Metadata, role_name: str, unverified: Metad class MetadataBundle(abc.Mapping): - def __init__(self, path: str): + def __init__(self, repository_path: str): """Initialize by loading root metadata from disk""" - self._path = path + self._path = repository_path self._bundle = {} # type: Dict[str: Metadata] self._local_load_attempted = {} self.reference_time = None - if not os.path.exists(path): + if not os.path.exists(self._path): # TODO try to create dir instead? raise exceptions.RepositoryError("Repository does not exist") @@ -353,7 +349,7 @@ def _load_timestamp(self, data: bytes): new_timestamp.signed.meta["snapshot.json"]["version"] < self.timestamp.signed.meta["snapshot.json"]["version"] ): - # TODO not sure about the + # TODO not sure about the correct exception here raise exceptions.ReplayedMetadataError( "snapshot", new_timestamp.signed.meta["snapshot.json"]["version"], From 7e457ec98c0b9f831f9504f5398311759a08d830 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Tue, 27 Apr 2021 11:26:42 +0300 Subject: [PATCH 50/86] Exceptions refactoring Make more exceptions derive from RepositoryError: The idea is that a client can just handle RepositoryError and will know that any problems resulting from unexpected metadata will end up there. Also fix some wildly misleading variable naming in ReplayedMetadataError Signed-off-by: Jussi Kukkonen --- tuf/exceptions.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/tuf/exceptions.py b/tuf/exceptions.py index baed5446eb..c5f795e0c6 100755 --- a/tuf/exceptions.py +++ b/tuf/exceptions.py @@ -89,12 +89,6 @@ def __repr__(self): # repr(self.observed_hash) + ')') - - -class BadVersionNumberError(Error): - """Indicate an error for metadata that contains an invalid version number.""" - - class BadPasswordError(Error): """Indicate an error after encountering an invalid password.""" @@ -107,6 +101,10 @@ class RepositoryError(Error): """Indicate an error with a repository's state, such as a missing file.""" +class BadVersionNumberError(RepositoryError): + """Indicate an error for metadata that contains an invalid version number.""" + + class MissingLocalRepositoryError(RepositoryError): """Raised when a local repository could not be found.""" @@ -119,36 +117,29 @@ class ForbiddenTargetError(RepositoryError): """Indicate that a role signed for a target that it was not delegated to.""" -class ExpiredMetadataError(Error): +class ExpiredMetadataError(RepositoryError): """Indicate that a TUF Metadata file has expired.""" class ReplayedMetadataError(RepositoryError): """Indicate that some metadata has been replayed to the client.""" - def __init__(self, metadata_role, previous_version, current_version): + def __init__(self, metadata_role, downloaded_version, current_version): super(ReplayedMetadataError, self).__init__() self.metadata_role = metadata_role - self.previous_version = previous_version + self.downloaded_version = downloaded_version self.current_version = current_version - def __str__(self): return ( 'Downloaded ' + repr(self.metadata_role) + ' is older (' + - repr(self.previous_version) + ') than the version currently ' + repr(self.downloaded_version) + ') than the version currently ' 'installed (' + repr(self.current_version) + ').') def __repr__(self): return self.__class__.__name__ + ' : ' + str(self) - # # Directly instance-reproducing: - # return ( - # self.__class__.__name__ + '(' + repr(self.metadata_role) + ', ' + - # repr(self.previous_version) + ', ' + repr(self.current_version) + ')') - - class CryptoError(Error): """Indicate any cryptography-related errors.""" @@ -250,7 +241,7 @@ class InvalidNameError(Error): """Indicate an error while trying to validate any type of named object.""" -class UnsignedMetadataError(Error): +class UnsignedMetadataError(RepositoryError): """Indicate metadata object with insufficient threshold of signatures.""" def __init__(self, message, signable): From 53f5ccb58a752dc6917b1c57ecb44a24cc8eefdd Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Tue, 27 Apr 2021 12:20:20 +0300 Subject: [PATCH 51/86] MetadataBundle: Exception refactor The goal is that the bundle only raises two kinds of errors: * user errors (ValueError/KeyError) that can be avoided by the caller * RepositoryErrors that are a result of unacceptable metadata: The requested action cannot succeed because of the metadata. These typically cannot be avoided by the caller. File open and serialization errors are handled internally. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 134 ++++++++++++++++++--------- 1 file changed, 92 insertions(+), 42 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index dcd16ca99a..f41b1d8e1d 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -89,6 +89,7 @@ from tuf import exceptions from tuf.api.metadata import Metadata +from tuf.api.serialization import SerializationError logger = logging.getLogger(__name__) @@ -178,9 +179,8 @@ def load_local_metadata(self, role_name: str, delegator_name: str = None) -> boo self._load_delegated_targets(data, role_name, delegator_name) return True - except Exception as e: - # TODO only handle specific errors - logger.debug("Failed to load local %s.json", role_name) + except (OSError, exceptions.RepositoryError) as e: + logger.debug("Failed to load local %s: %s", role_name, e) # TODO delete local file (except probably should not delete root.json?) return False @@ -196,7 +196,9 @@ def update_metadata(self, data: bytes, role_name: str, delegator_name: str = Non self._raise_on_unsupported_state(role_name) if not self._local_load_attempted.get(role_name): - raise exceptions.RepositoryError + raise ValueError( + f"Cannot update {role_name} before loading local metadata" + ) if role_name == "root": self._load_intermediate_root(data) @@ -220,14 +222,13 @@ def root_update_finished(self): Raises if root update is not a valid operation at this state Raises if validation fails """ - if self.timestamp is not None: - # bundle does not support this order of ops - raise exceptions.RepositoryError + if self.timestamp is None: + raise ValueError("Root update is already finished") # Store our reference "now", verify root expiry self.reference_time = datetime.utcnow() if self.root.signed.is_expired(self.reference_time): - raise exceptions.ExpiredMetadataError + raise exceptions.ExpiredMetadataError("New root.json is expired") logger.debug("Verified final root.json") @@ -242,20 +243,22 @@ def _raise_on_unsupported_state(self, role_name: str): elif role_name == "timestamp": if self.reference_time is None: # root_update_finished() not called - raise exceptions.RepositoryError + raise ValueError("Cannot load timestamp before root") if self.snapshot is not None: - raise exceptions.RepositoryError + raise ValueError("Cannot load timestamp after snapshot") elif role_name == "snapshot": if self.timestamp is None: - raise exceptions.RepositoryError + raise ValueError("Cannot load snapshot before timestamp") if self.targets is not None: - raise exceptions.RepositoryError + raise ValueError("Cannot load snapshot after targets") elif role_name == "targets": if self.snapshot is None: - raise exceptions.RepositoryError - else: # delegated role + raise ValueError("Cannot load targets before snapshot") + else: if self.targets is None: - raise exceptions.RepositoryError + raise ValueError( + "Cannot load delegated targets before targets" + ) # Generic rule: Updating a role is not allowed if # * role is already loaded AND @@ -263,8 +266,12 @@ def _raise_on_unsupported_state(self, role_name: str): role = self.get(role_name) if role is not None and role.signed.delegations is not None: for delegate in role.signed.delegations["roles"]: - if self.get(delegate["name"]) is not None: - raise exceptions.RepositoryError + delegate_name = delegate["name"] + if self.get(delegate_name) is not None: + raise ValueError( + f"Cannot load {role_name} after delegate" + f"{delegate_name}" + ) # Implement Mapping def __getitem__(self, key: str): @@ -298,9 +305,15 @@ def _load_intermediate_root(self, data: bytes): Raises if root fails verification """ - new_root = Metadata.from_bytes(data) + try: + new_root = Metadata.from_bytes(data) + except SerializationError as e: + raise exceptions.RepositoryError("Failed to load root") from e + if new_root.signed._type != "root": - raise exceptions.RepositoryError + raise exceptions.RepositoryError( + f"Expected 'root', got '{new_root.signed._type}'" + ) if self.root is not None: if not verify_with_threshold(self.root, "root", new_root): @@ -327,9 +340,15 @@ def _load_timestamp(self, data: bytes): Raises if verification fails """ - new_timestamp = Metadata.from_bytes(data) + try: + new_timestamp = Metadata.from_bytes(data) + except SerializationError as e: + raise exceptions.RepositoryError("Failed to load timestamp") from e + if new_timestamp.signed._type != "timestamp": - raise exceptions.RepositoryError + raise exceptions.RepositoryError( + f"Expected 'timestamp', got '{new_timestamp.signed._type}'" + ) if not verify_with_threshold(self.root, "timestamp", new_timestamp): raise exceptions.UnsignedMetadataError( @@ -357,27 +376,33 @@ def _load_timestamp(self, data: bytes): ) if new_timestamp.signed.is_expired(self.reference_time): - raise exceptions.ExpiredMetadataError + raise exceptions.ExpiredMetadataError("New timestamp is expired") self._bundle["timestamp"] = new_timestamp logger.debug("Loaded timestamp") def _load_snapshot(self, data: bytes): - # Verify against the hashes in timestamp, if any - meta = self.timestamp.signed.meta.get("snapshot.json") - if meta is None: - raise exceptions.RepositoryError + meta = self.timestamp.signed.meta["snapshot.json"] + # Verify against the hashes in timestamp, if any hashes = meta.get("hashes") or {} for algo, _hash in meta["hashes"].items(): digest_object = sslib_hash.digest(algo) digest_object.update(data) - if digest_object.hexdigest() != _hash: - raise exceptions.BadHashError() - new_snapshot = Metadata.from_bytes(data) + observed_hash = digest_object.hexdigest() + if observed_hash != _hash: + raise exceptions.BadHashError(_hash, observed_hash) + + try: + new_snapshot = Metadata.from_bytes(data) + except SerializationError as e: + raise exceptions.RepositoryError("Failed to load snapshot") from e + if new_snapshot.signed._type != "snapshot": - raise exceptions.RepositoryError + raise exceptions.RepositoryError( + f"Expected 'snapshot', got '{new_snapshot.signed._type}'" + ) if not verify_with_threshold(self.root, "snapshot", new_snapshot): raise exceptions.UnsignedMetadataError( @@ -388,7 +413,11 @@ def _load_snapshot(self, data: bytes): new_snapshot.signed.version != self.timestamp.signed.meta["snapshot.json"]["version"] ): - raise exceptions.BadVersionNumberError + raise exceptions.BadVersionNumberError( + f"Expected snapshot version" + f"{self.timestamp.signed.meta['snapshot.json']['version']}," + f"got {new_snapshot.signed.version}" + ) if self.snapshot: for filename, fileinfo in self.snapshot.signed.meta.items(): @@ -396,14 +425,19 @@ def _load_snapshot(self, data: bytes): # Prevent removal of any metadata in meta if new_fileinfo is None: - raise exceptions.ReplayedMetadataError + raise exceptions.RepositoryError( + f"New snapshot is missing info for '{filename}'" + ) # Prevent rollback of any metadata versions if new_fileinfo["version"] < fileinfo["version"]: - raise exceptions.ReplayedMetadataError + raise exceptions.BadVersionNumberError( + f"Expected {filename} version" + f"{new_fileinfo['version']}, got {fileinfo['version']}" + ) if new_snapshot.signed.is_expired(self.reference_time): - raise exceptions.ExpiredMetadataError + raise exceptions.ExpiredMetadataError("New snapshot is expired") self._bundle["snapshot"] = new_snapshot logger.debug("Loaded snapshot") @@ -413,25 +447,38 @@ def _load_targets(self, data: bytes): def _load_delegated_targets(self, data: bytes, role_name: str, delegator_name: str): logger.debug(f"Loading {role_name} delegated by {delegator_name}") + delegator = self.get(delegator_name) + # TODO this check could maybe be done in _raise_on_unspported_state if delegator == None: - raise exceptions.RepositoryError + raise exceptions.ValueError( + "Cannot load delegated target before delegator" + ) # Verify against the hashes in snapshot, if any meta = self.snapshot.signed.meta.get(f"{role_name}.json") if meta is None: - raise exceptions.RepositoryError + raise exceptions.RepositoryError( + f"Snapshot does not contain information for '{role_name}'" + ) hashes = meta.get("hashes") or {} for algo, _hash in hashes.items(): digest_object = sslib_hash.digest(algo) digest_object.update(data) - if digest_object.hexdigest() != _hash: - raise exceptions.BadHashError() + observed_hash = digest_object.hexdigest() + if observed_hash != _hash: + raise exceptions.BadHashError(_hash, observed_hash) + + try: + new_delegate = Metadata.from_bytes(data) + except SerializationError as e: + raise exceptions.RepositoryError("Failed to load snapshot") from e - new_delegate = Metadata.from_bytes(data) if new_delegate.signed._type != "targets": - raise exceptions.RepositoryError + raise exceptions.RepositoryError( + f"Expected 'targets', got '{new_delegate.signed._type}'" + ) if not verify_with_threshold(delegator, role_name, new_delegate): raise exceptions.UnsignedMetadataError( @@ -439,10 +486,13 @@ def _load_delegated_targets(self, data: bytes, role_name: str, delegator_name: s ) if new_delegate.signed.version != meta["version"]: - raise exceptions.BadVersionNumberError + raise exceptions.BadVersionNumberError( + f"Expected {role_name} version" + f"{meta['version']}, got {new_delegate.signed.version}" + ) if new_delegate.signed.is_expired(self.reference_time): - raise exceptions.ExpiredMetadataError + raise exceptions.ExpiredMetadataError(f"New {role_name} is expired") self._bundle[role_name] = new_delegate logger.debug(f"Loaded {role_name}") From 71793b61ecfda8c53d8c828d31c354d22ebc132a Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Tue, 4 May 2021 19:23:36 +0300 Subject: [PATCH 52/86] Update MetadataBundle with named methods Use named methods for the top-level metadata (e.g. load_local_timestamp() instead of load_local_metadata("timestamp")). This can now happen after the mirrors are no longer in play (so updater can easily call named methods) and makes the bundle implementation more straight-forward. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 316 +++++++++++++-------------- 1 file changed, 151 insertions(+), 165 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index f41b1d8e1d..3ebb187d5b 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -13,63 +13,65 @@ or, in the case of top-level metadata using the helper properties like 'MetadataBundle.root' -Metadata can be loaded into bundle by two means: - * loading from local storage: load_local_metadata() - (and, in the case of root metadata, the constuctor) - * updating from remote repository: update_metadata() - The rules for top-level metadata are * Metadata is loadable only if metadata it depends on is loaded * Metadata is immutable if any metadata depending on it has been loaded - * Loading from local storage must be attempted before updating from remote - * Updating from remote is never required + * Caller must load/update these in order: + root -> timestamp -> snapshot -> targets -> (other delegated targets) + * Caller should try loading local file before updating metadata from remote -Exceptions are raised if metadata fails to load in any way (except in the -case of local loads -- see load_local_metadata()). +Exceptions are raised if metadata fails to load in any way. The exception +to this is local loads -- only local root metadata needs to be valid: +other local metadata is allowed to be invalid (e.g. no longer signed): +it won't be loaded but there will not be an exception. Example (with hypothetical download function): >>> # Load local root >>> bundle = MetadataBundle("path/to/metadata") >>> ->>> # load more root versions from remote ->>> with download("root", bundle.root.signed.version + 1) as f: ->>> bundle.update_metadata(f.read()) +>>> # update root until no more are available from remote >>> with download("root", bundle.root.signed.version + 1) as f: ->>> bundle.update_metadata(f.read()) ->>> ->>> # Finally, no more roots from remote +>>> bundle.update_root(f.read()) +>>> # ... >>> bundle.root_update_finished() >>> ->>> # load local timestamp, then update it ->>> bundle.load_local_metadata("timestamp") +>>> # load timestamp, then update from remote +>>> bundle.load_local_timestamp() >>> with download("timestamp") as f: ->>> bundle.update_metadata(f.read()) +>>> bundle.update_timestamp(f.read()) >>> ->>> # load local snapshot, then update it if needed ->>> if not bundle.load_local_metadata("snapshot"): ->>> # load snapshot (consistent snapshot not shown) ->>> with download("snapshot") as f: ->>> bundle.update_metadata(f.read()) +>>> # load snapshot, update from remote if needed +>>> if not bundle.load_local_snapshot(): +>>> # TODO get version from timestamp +>>> with download("snapshot", version) as f: +>>> bundle.update_snapshot(f.read()) >>> ->>> # load local targets, then update it if needed ->>> if not bundle.load_local_metadata("targets"): ->>> version = bundle.snapshot.signed.meta["targets.json"]["version"] ->>> with download("snapshot", version + 1) as f: ->>> bundle.update_metadata(f.read()) +>>> # load local targets, update from remote if needed +>>> if not bundle.load_local_targets(): +>>> # TODO get version from snapshot +>>> with download("targets", version) as f: +>>> bundle.update_targets(f.read()) >>> ->>> # Top level metadata is now fully loaded and verified +>>> # load local delegated role, update from remote if needed +>>> if not bundle.load_local_delegated_targets("rolename", "targets"): +>>> # TODO get version from snapshot +>>> with download("rolename", version) as f: +>>> bundle.update_targets(f.read(), "rolename", "targets") TODO: - * exceptions are all over the place and not thought out at all + * exceptions are all over the place: the idea is that client could just handle a + generic RepositoryError that covers every issue that server provided metadata + could inflict (other errors would be user errors), but this is not yet the case * usefulness of root_update_finished() can be debated: it could be done - in the beginning of _load_timestamp()... + in the beginning of load_timestamp()... * there are some divergences from spec: * 5.3.11: timestamp and snapshot are not deleted right away (only on next load): the load functions will refuse to load the files when they are not signed by current root keys. Deleting at the specified point is possible but means additional code with some quirks.. + * in general local metadata files are not deleted (they just won't succesfully load) * a bit of repetition * No tests! * Naming maybe not final? @@ -130,7 +132,6 @@ def __init__(self, repository_path: str): """Initialize by loading root metadata from disk""" self._path = repository_path self._bundle = {} # type: Dict[str: Metadata] - self._local_load_attempted = {} self.reference_time = None if not os.path.exists(self._path): @@ -139,90 +140,49 @@ def __init__(self, repository_path: str): # Load and validate the local root metadata # Valid root metadata is required - if not self.load_local_metadata("root"): + logger.debug("Loading local root") + try: + with open(os.path.join(self._path, "root.json"), "rb") as f: + self._load_intermediate_root(f.read()) + except (OSError, exceptions.RepositoryError) as e: raise exceptions.RepositoryError("Failed to load local root metadata") - def load_local_metadata(self, role_name: str, delegator_name: str = None) -> bool: - """Loads metadata from local storage and inserts into bundle - - If bundle already contains 'role_name', nothing is loaded. - Failure to read the file, failure to parse it and failure to - load it as valid metadata will not raise exceptions: the function - will just fail. - - Raises if 'role_name' cannot be loaded from local storage at this state - - Returns True if 'role_name' is now in the bundle - """ - if self.get(role_name) is not None: - logger.debug("Already loaded local %s.json", role_name) - return True - - logger.debug("Loading local %s.json", role_name) + # Implement Mapping + def __getitem__(self, key: str): + return self._bundle[key] - self._raise_on_unsupported_state(role_name) - self._local_load_attempted[role_name] = True + def __len__(self): + return len(self._bundle) - try: - with open(os.path.join(self._path, f"{role_name}.json"), "rb") as f: - data = f.read() - - if role_name == "root": - self._load_intermediate_root(data) - elif role_name == "timestamp": - self._load_timestamp(data) - elif role_name == "snapshot": - self._load_snapshot(data) - elif role_name == "targets": - self._load_targets(data) - else: - self._load_delegated_targets(data, role_name, delegator_name) + def __iter__(self): + return iter(self._bundle) - return True - except (OSError, exceptions.RepositoryError) as e: - logger.debug("Failed to load local %s: %s", role_name, e) - # TODO delete local file (except probably should not delete root.json?) - return False + # Helper properties for top level metadata + @property + def root(self): + return self._bundle.get("root") - def update_metadata(self, data: bytes, role_name: str, delegator_name: str = None): - """Takes new metadata (from remote repository) and loads it into bundle + @property + def timestamp(self): + return self._bundle.get("timestamp") - Raises if 'role_name' cannot be update from remote at this state - Raises if 'data' cannot be parsed or validated - Raises if the new metadata cannot be verified by the bundle - """ - logger.debug("Updating %s", role_name) + @property + def snapshot(self): + return self._bundle.get("snapshot") - self._raise_on_unsupported_state(role_name) + @property + def targets(self): + return self._bundle.get("targets") - if not self._local_load_attempted.get(role_name): - raise ValueError( - f"Cannot update {role_name} before loading local metadata" - ) + # Public methods + def update_root(self, data: bytes): + logger.debug("Updating root") - if role_name == "root": - self._load_intermediate_root(data) - self.root.to_file(os.path.join(self._path, "root.json")) - elif role_name == "timestamp": - self._load_timestamp(data) - self.timestamp.to_file(os.path.join(self._path, "timestamp.json")) - elif role_name == "snapshot": - self._load_snapshot(data) - self.snapshot.to_file(os.path.join(self._path, "snapshot.json")) - elif role_name == "targets": - self._load_targets(data) - self.targets.to_file(os.path.join(self._path, "targets.json")) - else: - self._load_delegated_targets(data, role_name, delegator_name) - self[role_name].to_file(os.path.join(self._path, f"{role_name}.json")) + self._load_intermediate_root(data) + self.root.to_file(os.path.join(self._path, "root.json")) def root_update_finished(self): - """Marks root update as finished, validates the root metadata - - Raises if root update is not a valid operation at this state - Raises if validation fails - """ - if self.timestamp is None: + if self.reference_time is not None: raise ValueError("Root update is already finished") # Store our reference "now", verify root expiry @@ -232,79 +192,82 @@ def root_update_finished(self): logger.debug("Verified final root.json") - def _raise_on_unsupported_state(self, role_name: str): - """Raise if updating 'role_name' is not supported at this state""" + def load_local_timestamp(self): + logger.debug("Loading local timestamp") - # Special rules for top-level roles. We want to enforce a strict order - # root->snapshot->timestamp->targets where loading a metadata is no - # longer allowed when the next metadata in the order has been loaded - if role_name == "root": - pass - elif role_name == "timestamp": - if self.reference_time is None: - # root_update_finished() not called - raise ValueError("Cannot load timestamp before root") - if self.snapshot is not None: - raise ValueError("Cannot load timestamp after snapshot") - elif role_name == "snapshot": - if self.timestamp is None: - raise ValueError("Cannot load snapshot before timestamp") - if self.targets is not None: - raise ValueError("Cannot load snapshot after targets") - elif role_name == "targets": - if self.snapshot is None: - raise ValueError("Cannot load targets before snapshot") - else: - if self.targets is None: - raise ValueError( - "Cannot load delegated targets before targets" - ) + try: + with open(os.path.join(self._path, "timestamp.json"), "rb") as f: + self._load_timestamp(f.read()) + return True + except (OSError, exceptions.RepositoryError) as e: + logger.debug("Failed to load local timestamp: %s", e) + return False - # Generic rule: Updating a role is not allowed if - # * role is already loaded AND - # * role has a delegate that is already loaded - role = self.get(role_name) - if role is not None and role.signed.delegations is not None: - for delegate in role.signed.delegations["roles"]: - delegate_name = delegate["name"] - if self.get(delegate_name) is not None: - raise ValueError( - f"Cannot load {role_name} after delegate" - f"{delegate_name}" - ) + def update_timestamp(self, data: bytes): + logger.debug("Updating timestamp") - # Implement Mapping - def __getitem__(self, key: str): - return self._bundle[key] + self._load_timestamp(data) + self.timestamp.to_file(os.path.join(self._path, "timestamp.json")) - def __len__(self): - return len(self._bundle) + def load_local_snapshot(self): + logger.debug("Loading local snapshot") - def __iter__(self): - return iter(self._bundle) + try: + with open(os.path.join(self._path, "snapshot.json"), "rb") as f: + self._load_snapshot(f.read()) + return True + except (OSError, exceptions.RepositoryError) as e: + logger.debug("Failed to load local snapshot: %s", e) + return False - # Helper properties for top level metadata - @property - def root(self): - return self._bundle.get("root") + def update_snapshot(self, data: bytes): + logger.debug("Updating snapshot") - @property - def timestamp(self): - return self._bundle.get("timestamp") + self._load_snapshot(data) + self.snapshot.to_file(os.path.join(self._path, "snapshot.json")) - @property - def snapshot(self): - return self._bundle.get("snapshot") + def load_local_targets(self): + logger.debug("Loading local targets") - @property - def targets(self): - return self._bundle.get("targets") + try: + with open(os.path.join(self._path, "targets.json"), "rb") as f: + self._load_targets(f.read()) + return True + except (OSError, exceptions.RepositoryError) as e: + logger.debug("Failed to load local targets: %s", e) + return False + + def update_targets(self, data: bytes): + logger.debug("Updating targets") + + self._load_targets(data) + self.targets.to_file(os.path.join(self._path, "targets.json")) + + def load_local_delegated_targets(self, role_name: str, delegator_name: str): + logger.debug("Loading local %s", role_name) + + try: + with open(os.path.join(self._path, f"{role_name}.json"), "rb") as f: + self._load_delegated_targets(f.read(), role_name, delegator_name) + return True + except (OSError, exceptions.RepositoryError) as e: + logger.debug("Failed to load local %s: %s", role_name, e) + return False + + def update_delegated_targets(self, data: bytes, role_name: str, delegator_name: str = None): + logger.debug("Updating %s", role_name) + + self._load_delegated_targets(data, role_name, delegator_name) + self[role_name].to_file(os.path.join(self._path, f"{role_name}.json")) def _load_intermediate_root(self, data: bytes): """Verify the new root using current root (if any) and use it as current root Raises if root fails verification """ + if self.reference_time is not None: + raise ValueError("Cannot update root after root update is finished") + try: new_root = Metadata.from_bytes(data) except SerializationError as e: @@ -340,6 +303,12 @@ def _load_timestamp(self, data: bytes): Raises if verification fails """ + if self.reference_time is None: + # root_update_finished() not called + raise ValueError("Cannot update timestamp before root") + if self.snapshot is not None: + raise ValueError("Cannot update timestamp after snapshot") + try: new_timestamp = Metadata.from_bytes(data) except SerializationError as e: @@ -382,6 +351,15 @@ def _load_timestamp(self, data: bytes): logger.debug("Loaded timestamp") def _load_snapshot(self, data: bytes): + """Verifies the new snapshot and uses it as current snapshot + + Raises if verification fails + """ + + if self.timestamp is None: + raise ValueError("Cannot update snapshot before timestamp") + if self.targets is not None: + raise ValueError("Cannot update snapshot after targets") meta = self.timestamp.signed.meta["snapshot.json"] @@ -443,13 +421,21 @@ def _load_snapshot(self, data: bytes): logger.debug("Loaded snapshot") def _load_targets(self, data: bytes): + """Verifies the new targets and uses it as current targets + + Raises if verification fails + """ + if self.snapshot is None: + raise ValueError("Cannot load targets before snapshot") + self._load_delegated_targets(data, "targets", "root") def _load_delegated_targets(self, data: bytes, role_name: str, delegator_name: str): - logger.debug(f"Loading {role_name} delegated by {delegator_name}") + """Verifies the new delegated 'role_name' and uses it as current 'role_name' + Raises if verification fails + """ delegator = self.get(delegator_name) - # TODO this check could maybe be done in _raise_on_unspported_state if delegator == None: raise exceptions.ValueError( "Cannot load delegated target before delegator" @@ -495,4 +481,4 @@ def _load_delegated_targets(self, data: bytes, role_name: str, delegator_name: s raise exceptions.ExpiredMetadataError(f"New {role_name} is expired") self._bundle[role_name] = new_delegate - logger.debug(f"Loaded {role_name}") + logger.debug(f"Loaded {role_name} delegated by {delegator_name}") From 5596f777f897ad081319f09b7adfd1f787671d96 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Wed, 5 May 2021 21:44:20 +0300 Subject: [PATCH 53/86] MetadataBundle: Handle targets like delegated targets The public load_local_targets() and update_targets() can just call the versions for delegated targets to remove duplication. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 3ebb187d5b..dcc52ce3b3 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -227,21 +227,10 @@ def update_snapshot(self, data: bytes): self.snapshot.to_file(os.path.join(self._path, "snapshot.json")) def load_local_targets(self): - logger.debug("Loading local targets") - - try: - with open(os.path.join(self._path, "targets.json"), "rb") as f: - self._load_targets(f.read()) - return True - except (OSError, exceptions.RepositoryError) as e: - logger.debug("Failed to load local targets: %s", e) - return False + return self.load_local_delegated_targets("targets", "root") def update_targets(self, data: bytes): - logger.debug("Updating targets") - - self._load_targets(data) - self.targets.to_file(os.path.join(self._path, "targets.json")) + self.update_delegated_targets(data, "targets", "root") def load_local_delegated_targets(self, role_name: str, delegator_name: str): logger.debug("Loading local %s", role_name) @@ -420,21 +409,14 @@ def _load_snapshot(self, data: bytes): self._bundle["snapshot"] = new_snapshot logger.debug("Loaded snapshot") - def _load_targets(self, data: bytes): - """Verifies the new targets and uses it as current targets + def _load_delegated_targets(self, data: bytes, role_name: str, delegator_name: str): + """Verifies the new delegated 'role_name' and uses it as current 'role_name' Raises if verification fails """ if self.snapshot is None: raise ValueError("Cannot load targets before snapshot") - self._load_delegated_targets(data, "targets", "root") - - def _load_delegated_targets(self, data: bytes, role_name: str, delegator_name: str): - """Verifies the new delegated 'role_name' and uses it as current 'role_name' - - Raises if verification fails - """ delegator = self.get(delegator_name) if delegator == None: raise exceptions.ValueError( From 766f4948d4279d9c92a17b19b7fdd1a163a42854 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Wed, 5 May 2021 21:52:08 +0300 Subject: [PATCH 54/86] MetadataBundle: Avoid loading targets twice If role is loaded already, skip loading local version (this is an optimization since there's no case where the loaded version and the on-disk version should differ). This could be done for top-level metadata as well but the situation doesn't really come up there: there's no good reasons to try loading top-level local metadata multiple times, unlike in the targets case where same delegates may be loaded when looking up different target files. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index dcc52ce3b3..b4881eb880 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -233,6 +233,10 @@ def update_targets(self, data: bytes): self.update_delegated_targets(data, "targets", "root") def load_local_delegated_targets(self, role_name: str, delegator_name: str): + if self.get(role_name): + logger.debug("Local %s already loaded", role_name) + return True + logger.debug("Loading local %s", role_name) try: From 9051358ff3d2f1c80ea59f639c3b45676b7bd2bb Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Thu, 6 May 2021 10:01:16 +0300 Subject: [PATCH 55/86] MetadataBundle: fix import name Also use only lookup dictionary once if necessary. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index b4881eb880..cf4868191b 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -86,7 +86,7 @@ import os from typing import Dict -from securesystemslib import keys as sslib_hash +from securesystemslib import hash as sslib_hash from securesystemslib import keys as sslib_keys from tuf import exceptions @@ -358,7 +358,7 @@ def _load_snapshot(self, data: bytes): # Verify against the hashes in timestamp, if any hashes = meta.get("hashes") or {} - for algo, _hash in meta["hashes"].items(): + for algo, _hash in hashes.items(): digest_object = sslib_hash.digest(algo) digest_object.update(data) observed_hash = digest_object.hexdigest() From 1d22d5aedca75dc6b54a40550f3ed23231fe9811 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Thu, 6 May 2021 10:24:19 +0300 Subject: [PATCH 56/86] MetadataBundle: Improve hints and docs Complete the type hints for MetadataBundle. Slightly improve documentation. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 66 +++++++++++++++++----------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index cf4868191b..1f38250191 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -84,7 +84,7 @@ from datetime import datetime import logging import os -from typing import Dict +from typing import Dict, Iterator, Optional from securesystemslib import hash as sslib_hash from securesystemslib import keys as sslib_keys @@ -97,7 +97,7 @@ # This is a placeholder until ... # TODO issue 1306: implement this in Metadata API -def verify_with_threshold(delegator: Metadata, role_name: str, unverified: Metadata): +def verify_with_threshold(delegator: Metadata, role_name: str, unverified: Metadata) -> bool: if delegator.signed._type == "root": keys = delegator.signed.keys role = delegator.signed.roles.get(role_name) @@ -148,40 +148,42 @@ def __init__(self, repository_path: str): raise exceptions.RepositoryError("Failed to load local root metadata") # Implement Mapping - def __getitem__(self, key: str): + def __getitem__(self, key: str) -> Metadata: return self._bundle[key] - def __len__(self): + def __len__(self) -> int: return len(self._bundle) - def __iter__(self): + def __iter__(self) -> Iterator[Metadata]: return iter(self._bundle) # Helper properties for top level metadata @property - def root(self): + def root(self) -> Optional[Metadata]: return self._bundle.get("root") @property - def timestamp(self): + def timestamp(self) -> Optional[Metadata]: return self._bundle.get("timestamp") @property - def snapshot(self): + def snapshot(self) -> Optional[Metadata]: return self._bundle.get("snapshot") @property - def targets(self): + def targets(self) -> Optional[Metadata]: return self._bundle.get("targets") # Public methods def update_root(self, data: bytes): + """Update root metadata with data from remote repository.""" logger.debug("Updating root") self._load_intermediate_root(data) self.root.to_file(os.path.join(self._path, "root.json")) def root_update_finished(self): + """Mark root metadata as final.""" if self.reference_time is not None: raise ValueError("Root update is already finished") @@ -192,7 +194,10 @@ def root_update_finished(self): logger.debug("Verified final root.json") - def load_local_timestamp(self): + def load_local_timestamp(self) -> bool: + """Load cached timestamp metadata from local storage. + + Returns True if timestamp was succesfully loaded""" logger.debug("Loading local timestamp") try: @@ -204,12 +209,16 @@ def load_local_timestamp(self): return False def update_timestamp(self, data: bytes): + """Update timestamp metadata with data from remote repository.""" logger.debug("Updating timestamp") self._load_timestamp(data) self.timestamp.to_file(os.path.join(self._path, "timestamp.json")) - def load_local_snapshot(self): + def load_local_snapshot(self) -> bool: + """Load cached snapshot metadata from local storage. + + Returns True if snapshot was succesfully loaded""" logger.debug("Loading local snapshot") try: @@ -221,18 +230,28 @@ def load_local_snapshot(self): return False def update_snapshot(self, data: bytes): + """Update snapshot metadata with data from remote repository.""" logger.debug("Updating snapshot") self._load_snapshot(data) self.snapshot.to_file(os.path.join(self._path, "snapshot.json")) - def load_local_targets(self): + def load_local_targets(self) -> bool: + """Load cached targets metadata from local storage. + + Returns True if targets was succesfully loaded""" return self.load_local_delegated_targets("targets", "root") def update_targets(self, data: bytes): + """Update targets metadata with data from remote repository.""" self.update_delegated_targets(data, "targets", "root") - def load_local_delegated_targets(self, role_name: str, delegator_name: str): + def load_local_delegated_targets(self, role_name: str, delegator_name: str) -> bool: + """Load cached metadata for 'role_name' from local storage. + + Metadata for 'delegator_name' must be loaded already. + + Returns True if metadata was succesfully loaded""" if self.get(role_name): logger.debug("Local %s already loaded", role_name) return True @@ -248,16 +267,19 @@ def load_local_delegated_targets(self, role_name: str, delegator_name: str): return False def update_delegated_targets(self, data: bytes, role_name: str, delegator_name: str = None): + """Update 'rolename' metadata with data from remote repository. + + Metadata for 'delegator_name' must be loaded already.""" logger.debug("Updating %s", role_name) self._load_delegated_targets(data, role_name, delegator_name) self[role_name].to_file(os.path.join(self._path, f"{role_name}.json")) def _load_intermediate_root(self, data: bytes): - """Verify the new root using current root (if any) and use it as current root + """Verifies and loads 'data' as new root metadata. - Raises if root fails verification - """ + Note that an expired intermediate root is considered valid: expiry is + only checked for the final root in root_update_finished().""" if self.reference_time is not None: raise ValueError("Cannot update root after root update is finished") @@ -292,10 +314,7 @@ def _load_intermediate_root(self, data: bytes): logger.debug("Loaded root") def _load_timestamp(self, data: bytes): - """Verifies the new timestamp and uses it as current timestamp - - Raises if verification fails - """ + """Verifies and loads 'data' as new timestamp metadata.""" if self.reference_time is None: # root_update_finished() not called raise ValueError("Cannot update timestamp before root") @@ -344,10 +363,7 @@ def _load_timestamp(self, data: bytes): logger.debug("Loaded timestamp") def _load_snapshot(self, data: bytes): - """Verifies the new snapshot and uses it as current snapshot - - Raises if verification fails - """ + """Verifies and loads 'data' as new snapshot metadata.""" if self.timestamp is None: raise ValueError("Cannot update snapshot before timestamp") @@ -414,7 +430,7 @@ def _load_snapshot(self, data: bytes): logger.debug("Loaded snapshot") def _load_delegated_targets(self, data: bytes, role_name: str, delegator_name: str): - """Verifies the new delegated 'role_name' and uses it as current 'role_name' + """Verifies and loads 'data' as new metadata for delegated target 'role_name'. Raises if verification fails """ From e26772cc6d5b06e697faf4c611f366411087008d Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Thu, 6 May 2021 11:33:34 +0300 Subject: [PATCH 57/86] Remove unnecessary directory check at startup * Loading root.json will fail just as descriptively * As long as Bundle doesn't implement bootstrapping the local repo, there's also no need to create missing directories Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 1f38250191..6093efdf72 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -134,10 +134,6 @@ def __init__(self, repository_path: str): self._bundle = {} # type: Dict[str: Metadata] self.reference_time = None - if not os.path.exists(self._path): - # TODO try to create dir instead? - raise exceptions.RepositoryError("Repository does not exist") - # Load and validate the local root metadata # Valid root metadata is required logger.debug("Loading local root") From 800b0882123bb3e15d2ea4ca7d125902b46b321d Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Thu, 6 May 2021 14:46:34 +0300 Subject: [PATCH 58/86] MetadataBundle: Fix loads of linting issues Lots of fixes, mostly obvious ones. The trickier ones and pylint disables have comments added to explain them. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 95 +++++++++++++++++----------- 1 file changed, 59 insertions(+), 36 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 6093efdf72..a5fdb33836 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -61,17 +61,14 @@ TODO: - * exceptions are all over the place: the idea is that client could just handle a - generic RepositoryError that covers every issue that server provided metadata - could inflict (other errors would be user errors), but this is not yet the case + * exceptions are all over the place: the idea is that client could just handle + a generic RepositoryError that covers every issue that server provided + metadata could inflict (other errors would be user errors), but this is not + yet the case * usefulness of root_update_finished() can be debated: it could be done in the beginning of load_timestamp()... - * there are some divergences from spec: - * 5.3.11: timestamp and snapshot are not deleted right away (only on next load): - the load functions will refuse to load the files when they are not signed by - current root keys. Deleting at the specified point is possible but means additional - code with some quirks.. - * in general local metadata files are not deleted (they just won't succesfully load) + * there are some divergences from spec: in general local metadata files are + not deleted (they just won't succesfully load) * a bit of repetition * No tests! * Naming maybe not final? @@ -80,10 +77,10 @@ (not sure yet how: maybe a spec_logger that logs specification events?) """ -from collections import abc -from datetime import datetime import logging import os +from collections import abc +from datetime import datetime from typing import Dict, Iterator, Optional from securesystemslib import hash as sslib_hash @@ -93,11 +90,21 @@ from tuf.api.metadata import Metadata from tuf.api.serialization import SerializationError +# TODO: Either enaable old-style logging in pylintc (issue #1334) +# or change this file to use f-strings for logging +# pylint: disable=logging-too-many-args + +# TODO: signed._type really does not work issue #1375: +# pylint: disable=protected-access + logger = logging.getLogger(__name__) # This is a placeholder until ... # TODO issue 1306: implement this in Metadata API -def verify_with_threshold(delegator: Metadata, role_name: str, unverified: Metadata) -> bool: +def verify_with_threshold( + delegator: Metadata, role_name: str, unverified: Metadata +) -> bool: + """Verify 'unverified' with keys and treshold defined in delegator""" if delegator.signed._type == "root": keys = delegator.signed.keys role = delegator.signed.roles.get(role_name) @@ -121,13 +128,20 @@ def verify_with_threshold(delegator: Metadata, role_name: str, unverified: Metad try: if unverified.verify(key): unique_keys.add(key["keyval"]["public"]) - except: # TODO specify the Exceptions - pass + except Exception as e: # pylint: disable=broad-except + # TODO specify the Exceptions (see issue #1351) + logger.info("verify failed: %s", e) return len(unique_keys) >= role["threshold"] class MetadataBundle(abc.Mapping): + """Internal class to keep track of valid metadata in Updater + + MetadataBundle ensures that metadata is valid. It provides easy ways to + update the metadata with the caller making decisions on what is updated. + """ + def __init__(self, repository_path: str): """Initialize by loading root metadata from disk""" self._path = repository_path @@ -141,7 +155,9 @@ def __init__(self, repository_path: str): with open(os.path.join(self._path, "root.json"), "rb") as f: self._load_intermediate_root(f.read()) except (OSError, exceptions.RepositoryError) as e: - raise exceptions.RepositoryError("Failed to load local root metadata") + raise exceptions.RepositoryError( + "Failed to load local root metadata" + ) from e # Implement Mapping def __getitem__(self, key: str) -> Metadata: @@ -242,7 +258,9 @@ def update_targets(self, data: bytes): """Update targets metadata with data from remote repository.""" self.update_delegated_targets(data, "targets", "root") - def load_local_delegated_targets(self, role_name: str, delegator_name: str) -> bool: + def load_local_delegated_targets( + self, role_name: str, delegator_name: str + ) -> bool: """Load cached metadata for 'role_name' from local storage. Metadata for 'delegator_name' must be loaded already. @@ -256,13 +274,17 @@ def load_local_delegated_targets(self, role_name: str, delegator_name: str) -> b try: with open(os.path.join(self._path, f"{role_name}.json"), "rb") as f: - self._load_delegated_targets(f.read(), role_name, delegator_name) + self._load_delegated_targets( + f.read(), role_name, delegator_name + ) return True except (OSError, exceptions.RepositoryError) as e: logger.debug("Failed to load local %s: %s", role_name, e) return False - def update_delegated_targets(self, data: bytes, role_name: str, delegator_name: str = None): + def update_delegated_targets( + self, data: bytes, role_name: str, delegator_name: str = None + ): """Update 'rolename' metadata with data from remote repository. Metadata for 'delegator_name' must be loaded already.""" @@ -296,7 +318,6 @@ def _load_intermediate_root(self, data: bytes): ) if new_root.signed.version != self.root.signed.version + 1: - # TODO not a "Replayed Metadata attack": the version is just not what we expected raise exceptions.ReplayedMetadataError( "root", new_root.signed.version, self.root.signed.version ) @@ -358,7 +379,8 @@ def _load_timestamp(self, data: bytes): self._bundle["timestamp"] = new_timestamp logger.debug("Loaded timestamp") - def _load_snapshot(self, data: bytes): + # TODO: remove pylint disable once the hash verification is in metadata.py + def _load_snapshot(self, data: bytes): # pylint: disable=too-many-branches """Verifies and loads 'data' as new snapshot metadata.""" if self.timestamp is None: @@ -370,12 +392,12 @@ def _load_snapshot(self, data: bytes): # Verify against the hashes in timestamp, if any hashes = meta.get("hashes") or {} - for algo, _hash in hashes.items(): + for algo, stored_hash in hashes.items(): digest_object = sslib_hash.digest(algo) digest_object.update(data) observed_hash = digest_object.hexdigest() - if observed_hash != _hash: - raise exceptions.BadHashError(_hash, observed_hash) + if observed_hash != stored_hash: + raise exceptions.BadHashError(stored_hash, observed_hash) try: new_snapshot = Metadata.from_bytes(data) @@ -425,8 +447,10 @@ def _load_snapshot(self, data: bytes): self._bundle["snapshot"] = new_snapshot logger.debug("Loaded snapshot") - def _load_delegated_targets(self, data: bytes, role_name: str, delegator_name: str): - """Verifies and loads 'data' as new metadata for delegated target 'role_name'. + def _load_delegated_targets( + self, data: bytes, role_name: str, delegator_name: str + ): + """Verifies and loads 'data' as new metadata for target 'role_name'. Raises if verification fails """ @@ -434,10 +458,8 @@ def _load_delegated_targets(self, data: bytes, role_name: str, delegator_name: s raise ValueError("Cannot load targets before snapshot") delegator = self.get(delegator_name) - if delegator == None: - raise exceptions.ValueError( - "Cannot load delegated target before delegator" - ) + if delegator is None: + raise ValueError("Cannot load targets before delegator") # Verify against the hashes in snapshot, if any meta = self.snapshot.signed.meta.get(f"{role_name}.json") @@ -447,12 +469,12 @@ def _load_delegated_targets(self, data: bytes, role_name: str, delegator_name: s ) hashes = meta.get("hashes") or {} - for algo, _hash in hashes.items(): + for algo, stored_hash in hashes.items(): digest_object = sslib_hash.digest(algo) digest_object.update(data) observed_hash = digest_object.hexdigest() - if observed_hash != _hash: - raise exceptions.BadHashError(_hash, observed_hash) + if observed_hash != stored_hash: + raise exceptions.BadHashError(stored_hash, observed_hash) try: new_delegate = Metadata.from_bytes(data) @@ -466,17 +488,18 @@ def _load_delegated_targets(self, data: bytes, role_name: str, delegator_name: s if not verify_with_threshold(delegator, role_name, new_delegate): raise exceptions.UnsignedMetadataError( - f"New {role_name} is not signed by {delegator_name}" + f"New {role_name} is not signed by {delegator_name}", + new_delegate, ) if new_delegate.signed.version != meta["version"]: raise exceptions.BadVersionNumberError( - f"Expected {role_name} version" - f"{meta['version']}, got {new_delegate.signed.version}" + f"Expected {role_name} version" + f"{meta['version']}, got {new_delegate.signed.version}" ) if new_delegate.signed.is_expired(self.reference_time): raise exceptions.ExpiredMetadataError(f"New {role_name} is expired") self._bundle[role_name] = new_delegate - logger.debug(f"Loaded {role_name} delegated by {delegator_name}") + logger.debug("Loaded %s delegated by %s", role_name, delegator_name) From b6817886ccd545e983bfef0a988846e919419cab Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Tue, 11 May 2021 15:09:14 +0300 Subject: [PATCH 59/86] Improve documentation Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index a5fdb33836..fc5b2272d2 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -4,10 +4,11 @@ """TUF client bundle-of-metadata MetadataBundle keeps track of current valid set of metadata for the client, -and handles almost every step of the "Detailed client workflow" in the TUF -specification (the remaining steps are download related). The bundle takes -care of persisting valid metadata on disk, loading local metadata from disk -and deleting invalid local metadata. +and handles almost every step of the "Detailed client workflow" ( +https://theupdateframework.github.io/specification/latest#detailed-client-workflow) +in the TUF specification (the remaining steps are download related). The +bundle takes care of persisting valid metadata on disk and loading local +metadata from disk. Loaded metadata can be accessed via the index access with rolename as key or, in the case of top-level metadata using the helper properties like @@ -16,9 +17,12 @@ The rules for top-level metadata are * Metadata is loadable only if metadata it depends on is loaded * Metadata is immutable if any metadata depending on it has been loaded - * Caller must load/update these in order: + * Metadata must be loaded/updated in order: root -> timestamp -> snapshot -> targets -> (other delegated targets) - * Caller should try loading local file before updating metadata from remote + * For each metadata either local load or the remote update must succeed + * Caller should try loading local version before updating metadata from remote + (the exception is root where local data is loaded at MetadataBundle + initialization: the initialization fails if local data cannot be loaded) Exceptions are raised if metadata fails to load in any way. The exception to this is local loads -- only local root metadata needs to be valid: @@ -104,7 +108,7 @@ def verify_with_threshold( delegator: Metadata, role_name: str, unverified: Metadata ) -> bool: - """Verify 'unverified' with keys and treshold defined in delegator""" + """Verify 'unverified' with keys and threshold defined in delegator""" if delegator.signed._type == "root": keys = delegator.signed.keys role = delegator.signed.roles.get(role_name) From 66fa37b259ba91a4913ce24497216fd6d40dba6a Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 May 2021 11:36:32 +0300 Subject: [PATCH 60/86] MetadataBundle: Update to API changes Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index fc5b2272d2..1f7b5d2236 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -91,7 +91,7 @@ from securesystemslib import keys as sslib_keys from tuf import exceptions -from tuf.api.metadata import Metadata +from tuf.api.metadata import Metadata, Root, Targets from tuf.api.serialization import SerializationError # TODO: Either enaable old-style logging in pylintc (issue #1334) @@ -109,14 +109,17 @@ def verify_with_threshold( delegator: Metadata, role_name: str, unverified: Metadata ) -> bool: """Verify 'unverified' with keys and threshold defined in delegator""" - if delegator.signed._type == "root": + role = None + keys = {} + if isinstance(delegator.signed, Root): keys = delegator.signed.keys role = delegator.signed.roles.get(role_name) - elif delegator.signed._type == "targets": - keys = delegator.signed.delegations["keys"] - # role names are unique: first match is enough - roles = delegator.signed.delegations["roles"] - role = next((role for role in roles if role["name"] == role_name), None) + elif isinstance(delegator.signed, Targets): + if delegator.signed.delegations: + keys = delegator.signed.delegations.keys + # role names are unique: first match is enough + roles = delegator.signed.delegations.roles + role = next((r for r in roles if r.name == role_name), None) else: raise ValueError("Call is valid only on delegator metadata") @@ -125,9 +128,9 @@ def verify_with_threshold( # verify that delegate is signed by correct threshold of unique keys unique_keys = set() - for keyid in role["keyids"]: - key_metadata = keys[keyid] - key, dummy = sslib_keys.format_metadata_to_key(key_metadata) + for keyid in role.keyids: + key_dict = keys[keyid].to_dict() + key, dummy = sslib_keys.format_metadata_to_key(key_dict) try: if unverified.verify(key): @@ -136,7 +139,7 @@ def verify_with_threshold( # TODO specify the Exceptions (see issue #1351) logger.info("verify failed: %s", e) - return len(unique_keys) >= role["threshold"] + return len(unique_keys) >= role.threshold class MetadataBundle(abc.Mapping): From 0bbfe038cfb5286f1cb0fcbd19cc7c6afd744504 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 May 2021 12:32:45 +0300 Subject: [PATCH 61/86] tests: Add minimal test case for Bundle Signed-off-by: Jussi Kukkonen --- tests/test_metadata_bundle.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tests/test_metadata_bundle.py diff --git a/tests/test_metadata_bundle.py b/tests/test_metadata_bundle.py new file mode 100644 index 0000000000..7d12e0cc7f --- /dev/null +++ b/tests/test_metadata_bundle.py @@ -0,0 +1,29 @@ +import logging +import os +import sys +import unittest + +from tuf.api import metadata +from tuf.client_rework.metadata_bundle import MetadataBundle + +from tests import utils + +logger = logging.getLogger(__name__) + +class TestMetadataBundle(unittest.TestCase): + def test_local_load(self): + repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') + + bundle = MetadataBundle(repo_dir) + bundle.root_update_finished() + + self.assertTrue(bundle.load_local_timestamp()) + self.assertTrue(bundle.load_local_snapshot()) + self.assertTrue(bundle.load_local_targets()) + self.assertTrue(bundle.load_local_delegated_targets('role1','targets')) + self.assertTrue(bundle.load_local_delegated_targets('role2','role1')) + + +if __name__ == '__main__': + utils.configure_test_logging(sys.argv) + unittest.main() From 112b333bba2c420cddb53e4e0b7bc31d00173d83 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 May 2021 14:02:29 +0300 Subject: [PATCH 62/86] Metadata API: Fix DelegatedRole serialization issue A DelegatedRole with paths=[] fails to serialize correctly (paths is not included in the output json). Fix the issue, modify tests to notice a regression. Fixes #1389 Signed-off-by: Jussi Kukkonen --- tests/test_api.py | 18 ++++++++++-------- tuf/api/metadata.py | 6 +++--- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 6e591a9eeb..a3b2381e13 100755 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -450,21 +450,23 @@ def test_delegated_role_class(self): with self.assertRaises(ValueError): DelegatedRole.from_dict(role.copy()) - # Test creating DelegatedRole only with "path_hash_prefixes" + # Test creating DelegatedRole only with "path_hash_prefixes" (an empty one) del role["paths"] - DelegatedRole.from_dict(role.copy()) - role["paths"] = "foo" + role["path_hash_prefixes"] = [] + role_obj = DelegatedRole.from_dict(role.copy()) + self.assertEqual(role_obj.to_dict(), role) - # Test creating DelegatedRole only with "paths" + # Test creating DelegatedRole only with "paths" (now an empty one) del role["path_hash_prefixes"] - DelegatedRole.from_dict(role.copy()) - role["path_hash_prefixes"] = "foo" + role["paths"] = [] + role_obj = DelegatedRole.from_dict(role.copy()) + self.assertEqual(role_obj.to_dict(), role) # Test creating DelegatedRole without "paths" and # "path_hash_prefixes" set del role["paths"] - del role["path_hash_prefixes"] - DelegatedRole.from_dict(role) + role_obj = DelegatedRole.from_dict(role.copy()) + self.assertEqual(role_obj.to_dict(), role) def test_delegation_class(self): diff --git a/tuf/api/metadata.py b/tuf/api/metadata.py index 361630ef25..6cb654b937 100644 --- a/tuf/api/metadata.py +++ b/tuf/api/metadata.py @@ -770,7 +770,7 @@ def __init__( super().__init__(keyids, threshold, unrecognized_fields) self.name = name self.terminating = terminating - if paths and path_hash_prefixes: + if paths is not None and path_hash_prefixes is not None: raise ValueError( "Only one of the attributes 'paths' and" "'path_hash_prefixes' can be set!" @@ -806,9 +806,9 @@ def to_dict(self) -> Dict[str, Any]: "terminating": self.terminating, **base_role_dict, } - if self.paths: + if self.paths is not None: res_dict["paths"] = self.paths - elif self.path_hash_prefixes: + elif self.path_hash_prefixes is not None: res_dict["path_hash_prefixes"] = self.path_hash_prefixes return res_dict From f8b714d1675a857ef72d545f5d693edb62e7278d Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 May 2021 14:26:26 +0300 Subject: [PATCH 63/86] Metadata API: Don't do equality comparisons on containers Use either "if X is not None:" or a try-except instead of a "if X:". I believe Targets.from_dict() was not really broken with previous code but it looks suspicious and did fail the added test with a strange exception: I expect the from_dict() methods to mainly fail with KeyErrors, ValueErrors or AttributeErrors if file format structure is incorrect. Signed-off-by: Jussi Kukkonen --- tests/test_api.py | 15 +++++++++++++++ tuf/api/metadata.py | 11 +++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index a3b2381e13..3f95459e93 100755 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -496,6 +496,21 @@ def test_delegation_class(self): delegations = Delegations.from_dict(copy.deepcopy(delegations_dict)) self.assertEqual(delegations_dict, delegations.to_dict()) + # empty keys and roles + delegations_dict = {"keys":{}, "roles":[]} + delegations = Delegations.from_dict(delegations_dict.copy()) + self.assertEqual(delegations_dict, delegations.to_dict()) + + # Test some basic missing or broken input + invalid_delegations_dicts = [ + {}, + {"keys":None, "roles":None}, + {"keys":{"foo":0}, "roles":[]}, + {"keys":{}, "roles":["foo"]}, + ] + for d in invalid_delegations_dicts: + with self.assertRaises((KeyError, AttributeError)): + Delegations.from_dict(d) def test_metadata_targets(self): targets_path = os.path.join( diff --git a/tuf/api/metadata.py b/tuf/api/metadata.py index 6cb654b937..2aeb80f6cd 100644 --- a/tuf/api/metadata.py +++ b/tuf/api/metadata.py @@ -911,9 +911,12 @@ def from_dict(cls, targets_dict: Dict[str, Any]) -> "Targets": """Creates Targets object from its dict representation.""" common_args = cls._common_fields_from_dict(targets_dict) targets = targets_dict.pop("targets") - delegations = targets_dict.pop("delegations", None) - if delegations: - delegations = Delegations.from_dict(delegations) + try: + delegations_dict = targets_dict.pop("delegations") + except KeyError: + delegations = None + else: + delegations = Delegations.from_dict(delegations_dict) # All fields left in the targets_dict are unrecognized. return cls(*common_args, targets, delegations, targets_dict) @@ -921,7 +924,7 @@ def to_dict(self) -> Dict[str, Any]: """Returns the dict representation of self.""" targets_dict = self._common_fields_to_dict() targets_dict["targets"] = self.targets - if self.delegations: + if self.delegations is not None: targets_dict["delegations"] = self.delegations.to_dict() return targets_dict From 2d155faae69d845da929ad1637a67fa6ba7438c0 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 May 2021 16:25:01 +0300 Subject: [PATCH 64/86] MetadataBundle: Change ValueErrors to RuntimeErrors As the metadata type is no longer an argument, these are not ValueErrors. Signed-off-by: Jussi Kukkonen --- tests/test_metadata_bundle.py | 20 ++++++++++++++++++++ tuf/client_rework/metadata_bundle.py | 16 ++++++++-------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/tests/test_metadata_bundle.py b/tests/test_metadata_bundle.py index 7d12e0cc7f..be30e6ba83 100644 --- a/tests/test_metadata_bundle.py +++ b/tests/test_metadata_bundle.py @@ -14,12 +14,32 @@ class TestMetadataBundle(unittest.TestCase): def test_local_load(self): repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') + # test loading all local metadata succesfully bundle = MetadataBundle(repo_dir) bundle.root_update_finished() + self.assertTrue(bundle.load_local_timestamp()) + self.assertTrue(bundle.load_local_snapshot()) + self.assertTrue(bundle.load_local_targets()) + self.assertTrue(bundle.load_local_delegated_targets('role1','targets')) + self.assertTrue(bundle.load_local_delegated_targets('role2','role1')) + + # Make sure loading metadata without its "dependencies" fails + bundle = MetadataBundle(repo_dir) + with self.assertRaises(RuntimeError): + bundle.load_local_timestamp() + bundle.root_update_finished() + with self.assertRaises(RuntimeError): + bundle.load_local_snapshot() self.assertTrue(bundle.load_local_timestamp()) + with self.assertRaises(RuntimeError): + bundle.load_local_targets() self.assertTrue(bundle.load_local_snapshot()) + with self.assertRaises(RuntimeError): + bundle.load_local_delegated_targets('role1','targets') self.assertTrue(bundle.load_local_targets()) + with self.assertRaises(RuntimeError): + bundle.load_local_delegated_targets('role2','role1') self.assertTrue(bundle.load_local_delegated_targets('role1','targets')) self.assertTrue(bundle.load_local_delegated_targets('role2','role1')) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 1f7b5d2236..3ea73809aa 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -204,7 +204,7 @@ def update_root(self, data: bytes): def root_update_finished(self): """Mark root metadata as final.""" if self.reference_time is not None: - raise ValueError("Root update is already finished") + raise RuntimeError("Root update is already finished") # Store our reference "now", verify root expiry self.reference_time = datetime.utcnow() @@ -306,7 +306,7 @@ def _load_intermediate_root(self, data: bytes): Note that an expired intermediate root is considered valid: expiry is only checked for the final root in root_update_finished().""" if self.reference_time is not None: - raise ValueError("Cannot update root after root update is finished") + raise RuntimeError("Cannot update root after root update is finished") try: new_root = Metadata.from_bytes(data) @@ -341,9 +341,9 @@ def _load_timestamp(self, data: bytes): """Verifies and loads 'data' as new timestamp metadata.""" if self.reference_time is None: # root_update_finished() not called - raise ValueError("Cannot update timestamp before root") + raise RuntimeError("Cannot update timestamp before root") if self.snapshot is not None: - raise ValueError("Cannot update timestamp after snapshot") + raise RuntimeError("Cannot update timestamp after snapshot") try: new_timestamp = Metadata.from_bytes(data) @@ -391,9 +391,9 @@ def _load_snapshot(self, data: bytes): # pylint: disable=too-many-branches """Verifies and loads 'data' as new snapshot metadata.""" if self.timestamp is None: - raise ValueError("Cannot update snapshot before timestamp") + raise RuntimeError("Cannot update snapshot before timestamp") if self.targets is not None: - raise ValueError("Cannot update snapshot after targets") + raise RuntimeError("Cannot update snapshot after targets") meta = self.timestamp.signed.meta["snapshot.json"] @@ -462,11 +462,11 @@ def _load_delegated_targets( Raises if verification fails """ if self.snapshot is None: - raise ValueError("Cannot load targets before snapshot") + raise RuntimeError("Cannot load targets before snapshot") delegator = self.get(delegator_name) if delegator is None: - raise ValueError("Cannot load targets before delegator") + raise RuntimeError("Cannot load targets before delegator") # Verify against the hashes in snapshot, if any meta = self.snapshot.signed.meta.get(f"{role_name}.json") From eb648d19bc340ebbe3a60d57eae5ddcff76f6b26 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 May 2021 17:32:02 +0300 Subject: [PATCH 65/86] MetadataBundle: Save original files on disk Don't use the serialized format as that won't match any hashes in "meta". Add basic tests for updating metadata. Signed-off-by: Jussi Kukkonen --- tests/test_metadata_bundle.py | 59 ++++++++++++++++++++++++++-- tuf/client_rework/metadata_bundle.py | 12 ++++-- 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/tests/test_metadata_bundle.py b/tests/test_metadata_bundle.py index be30e6ba83..b566a7a27b 100644 --- a/tests/test_metadata_bundle.py +++ b/tests/test_metadata_bundle.py @@ -1,6 +1,8 @@ import logging import os +import shutil import sys +import tempfile import unittest from tuf.api import metadata @@ -11,11 +13,26 @@ logger = logging.getLogger(__name__) class TestMetadataBundle(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.temporary_directory = tempfile.mkdtemp(dir=os.getcwd()) + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.temporary_directory) + + def setUp(self): + # copy metadata to "local repo" + shutil.copytree( + os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata'), + self.temporary_directory, + dirs_exist_ok=True + ) + def test_local_load(self): - repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') # test loading all local metadata succesfully - bundle = MetadataBundle(repo_dir) + bundle = MetadataBundle(self.temporary_directory) bundle.root_update_finished() self.assertTrue(bundle.load_local_timestamp()) self.assertTrue(bundle.load_local_snapshot()) @@ -24,7 +41,7 @@ def test_local_load(self): self.assertTrue(bundle.load_local_delegated_targets('role2','role1')) # Make sure loading metadata without its "dependencies" fails - bundle = MetadataBundle(repo_dir) + bundle = MetadataBundle(self.temporary_directory) with self.assertRaises(RuntimeError): bundle.load_local_timestamp() @@ -43,6 +60,42 @@ def test_local_load(self): self.assertTrue(bundle.load_local_delegated_targets('role1','targets')) self.assertTrue(bundle.load_local_delegated_targets('role2','role1')) + def test_update(self): + remote_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') + + # remove all but root.json from local repo + os.remove(os.path.join(self.temporary_directory, "timestamp.json")) + os.remove(os.path.join(self.temporary_directory, "snapshot.json")) + os.remove(os.path.join(self.temporary_directory, "targets.json")) + os.remove(os.path.join(self.temporary_directory, "role1.json")) + os.remove(os.path.join(self.temporary_directory, "role2.json")) + + # test updating metadata succesfully + bundle = MetadataBundle(self.temporary_directory) + bundle.root_update_finished() + + with open(os.path.join(remote_dir, "timestamp.json"), "rb") as f: + bundle.update_timestamp(f.read()) + with open(os.path.join(remote_dir, "snapshot.json"), "rb") as f: + bundle.update_snapshot(f.read()) + with open(os.path.join(remote_dir, "targets.json"), "rb") as f: + bundle.update_targets(f.read()) + with open(os.path.join(remote_dir, "role1.json"), "rb") as f: + bundle.update_delegated_targets(f.read(), "role1", "targets") + with open(os.path.join(remote_dir, "role2.json"), "rb") as f: + bundle.update_delegated_targets(f.read(), "role2", "role1") + + # test loading the metadata (that should now be locally available) + bundle = MetadataBundle(self.temporary_directory) + bundle.root_update_finished() + self.assertTrue(bundle.load_local_timestamp()) + self.assertTrue(bundle.load_local_snapshot()) + self.assertTrue(bundle.load_local_targets()) + self.assertTrue(bundle.load_local_delegated_targets('role1','targets')) + self.assertTrue(bundle.load_local_delegated_targets('role2','role1')) + + # TODO test loading one version, then updating to new versions of each metadata + if __name__ == '__main__': utils.configure_test_logging(sys.argv) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 3ea73809aa..91f0b39de3 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -199,7 +199,8 @@ def update_root(self, data: bytes): logger.debug("Updating root") self._load_intermediate_root(data) - self.root.to_file(os.path.join(self._path, "root.json")) + with open(os.path.join(self._path, "root.json"), "wb") as f: + f.write(data) def root_update_finished(self): """Mark root metadata as final.""" @@ -232,7 +233,8 @@ def update_timestamp(self, data: bytes): logger.debug("Updating timestamp") self._load_timestamp(data) - self.timestamp.to_file(os.path.join(self._path, "timestamp.json")) + with open(os.path.join(self._path, "timestamp.json"), "wb") as f: + f.write(data) def load_local_snapshot(self) -> bool: """Load cached snapshot metadata from local storage. @@ -253,7 +255,8 @@ def update_snapshot(self, data: bytes): logger.debug("Updating snapshot") self._load_snapshot(data) - self.snapshot.to_file(os.path.join(self._path, "snapshot.json")) + with open(os.path.join(self._path, "snapshot.json"), "wb") as f: + f.write(data) def load_local_targets(self) -> bool: """Load cached targets metadata from local storage. @@ -298,7 +301,8 @@ def update_delegated_targets( logger.debug("Updating %s", role_name) self._load_delegated_targets(data, role_name, delegator_name) - self[role_name].to_file(os.path.join(self._path, f"{role_name}.json")) + with open(os.path.join(self._path, f"{role_name}.json"), "wb") as f: + f.write(data) def _load_intermediate_root(self, data: bytes): """Verifies and loads 'data' as new root metadata. From 3b30d085ee896ccf20cd31acf34fef87d163ac78 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 May 2021 17:52:51 +0300 Subject: [PATCH 66/86] MetadataBundle: Store reference time earlier Spec says reference time should be the beginning of the process: do that. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 91f0b39de3..67ad2df450 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -153,7 +153,8 @@ def __init__(self, repository_path: str): """Initialize by loading root metadata from disk""" self._path = repository_path self._bundle = {} # type: Dict[str: Metadata] - self.reference_time = None + self.reference_time = datetime.utcnow() + self._root_update_finished = False # Load and validate the local root metadata # Valid root metadata is required @@ -204,14 +205,13 @@ def update_root(self, data: bytes): def root_update_finished(self): """Mark root metadata as final.""" - if self.reference_time is not None: + if self._root_update_finished: raise RuntimeError("Root update is already finished") - # Store our reference "now", verify root expiry - self.reference_time = datetime.utcnow() if self.root.signed.is_expired(self.reference_time): raise exceptions.ExpiredMetadataError("New root.json is expired") + self._root_update_finished = True logger.debug("Verified final root.json") def load_local_timestamp(self) -> bool: @@ -309,7 +309,7 @@ def _load_intermediate_root(self, data: bytes): Note that an expired intermediate root is considered valid: expiry is only checked for the final root in root_update_finished().""" - if self.reference_time is not None: + if self._root_update_finished: raise RuntimeError("Cannot update root after root update is finished") try: @@ -343,7 +343,7 @@ def _load_intermediate_root(self, data: bytes): def _load_timestamp(self, data: bytes): """Verifies and loads 'data' as new timestamp metadata.""" - if self.reference_time is None: + if not self._root_update_finished: # root_update_finished() not called raise RuntimeError("Cannot update timestamp before root") if self.snapshot is not None: From 8d0245ab30dfc3d2cdd1c9109391e70853de1131 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 May 2021 17:58:53 +0300 Subject: [PATCH 67/86] MetadataBundle: Use type, not _type Signed now has "type" attribute, use that. Also remove another pylint disable that is no longer needed (logging is now old style). Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 67ad2df450..12ef50e4d3 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -94,13 +94,6 @@ from tuf.api.metadata import Metadata, Root, Targets from tuf.api.serialization import SerializationError -# TODO: Either enaable old-style logging in pylintc (issue #1334) -# or change this file to use f-strings for logging -# pylint: disable=logging-too-many-args - -# TODO: signed._type really does not work issue #1375: -# pylint: disable=protected-access - logger = logging.getLogger(__name__) # This is a placeholder until ... @@ -317,9 +310,9 @@ def _load_intermediate_root(self, data: bytes): except SerializationError as e: raise exceptions.RepositoryError("Failed to load root") from e - if new_root.signed._type != "root": + if new_root.signed.type != "root": raise exceptions.RepositoryError( - f"Expected 'root', got '{new_root.signed._type}'" + f"Expected 'root', got '{new_root.signed.type}'" ) if self.root is not None: @@ -354,9 +347,9 @@ def _load_timestamp(self, data: bytes): except SerializationError as e: raise exceptions.RepositoryError("Failed to load timestamp") from e - if new_timestamp.signed._type != "timestamp": + if new_timestamp.signed.type != "timestamp": raise exceptions.RepositoryError( - f"Expected 'timestamp', got '{new_timestamp.signed._type}'" + f"Expected 'timestamp', got '{new_timestamp.signed.type}'" ) if not verify_with_threshold(self.root, "timestamp", new_timestamp): @@ -415,9 +408,9 @@ def _load_snapshot(self, data: bytes): # pylint: disable=too-many-branches except SerializationError as e: raise exceptions.RepositoryError("Failed to load snapshot") from e - if new_snapshot.signed._type != "snapshot": + if new_snapshot.signed.type != "snapshot": raise exceptions.RepositoryError( - f"Expected 'snapshot', got '{new_snapshot.signed._type}'" + f"Expected 'snapshot', got '{new_snapshot.signed.type}'" ) if not verify_with_threshold(self.root, "snapshot", new_snapshot): @@ -492,9 +485,9 @@ def _load_delegated_targets( except SerializationError as e: raise exceptions.RepositoryError("Failed to load snapshot") from e - if new_delegate.signed._type != "targets": + if new_delegate.signed.type != "targets": raise exceptions.RepositoryError( - f"Expected 'targets', got '{new_delegate.signed._type}'" + f"Expected 'targets', got '{new_delegate.signed.type}'" ) if not verify_with_threshold(delegator, role_name, new_delegate): From 876fda1bb26423518a2e1399403cc5c87ff27505 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 May 2021 18:02:53 +0300 Subject: [PATCH 68/86] MetadataBundle: Add comments about the process Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 12ef50e4d3..54d5e02a42 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -204,6 +204,9 @@ def root_update_finished(self): if self.root.signed.is_expired(self.reference_time): raise exceptions.ExpiredMetadataError("New root.json is expired") + # We skip specification step 5.3.11: deleting timestamp and snapshot + # with rotated keys is not needed as they will be invalid, are not + # loaded and cannot be loaded self._root_update_finished = True logger.debug("Verified final root.json") @@ -316,6 +319,7 @@ def _load_intermediate_root(self, data: bytes): ) if self.root is not None: + # We are not loading initial trusted root: verify the new one if not verify_with_threshold(self.root, "root", new_root): raise exceptions.UnsignedMetadataError( "New root is not signed by root", new_root.signed From 112f3b6a0334be4a68f4d5e72067b832285fba09 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 May 2021 20:35:12 +0300 Subject: [PATCH 69/86] MetadataBundle: Handle Deserialization errors because we are deserializing, not serializing. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 54d5e02a42..8e0e9e1b94 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -92,7 +92,7 @@ from tuf import exceptions from tuf.api.metadata import Metadata, Root, Targets -from tuf.api.serialization import SerializationError +from tuf.api.serialization import DeserializationError logger = logging.getLogger(__name__) @@ -310,7 +310,7 @@ def _load_intermediate_root(self, data: bytes): try: new_root = Metadata.from_bytes(data) - except SerializationError as e: + except DeserializationError as e: raise exceptions.RepositoryError("Failed to load root") from e if new_root.signed.type != "root": @@ -348,7 +348,7 @@ def _load_timestamp(self, data: bytes): try: new_timestamp = Metadata.from_bytes(data) - except SerializationError as e: + except DeserializationError as e: raise exceptions.RepositoryError("Failed to load timestamp") from e if new_timestamp.signed.type != "timestamp": @@ -409,7 +409,7 @@ def _load_snapshot(self, data: bytes): # pylint: disable=too-many-branches try: new_snapshot = Metadata.from_bytes(data) - except SerializationError as e: + except DeserializationError as e: raise exceptions.RepositoryError("Failed to load snapshot") from e if new_snapshot.signed.type != "snapshot": @@ -486,7 +486,7 @@ def _load_delegated_targets( try: new_delegate = Metadata.from_bytes(data) - except SerializationError as e: + except DeserializationError as e: raise exceptions.RepositoryError("Failed to load snapshot") from e if new_delegate.signed.type != "targets": From b86d1f733fe09930c5f810998aeb210b6b385a2e Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 May 2021 20:36:51 +0300 Subject: [PATCH 70/86] MetadataBundle: Raise instead of returning bool The bundle should now raise * derivatives of RepositoryError on failures that are likely a result of server error or a malicious server * RuntimeErrors if calls were made when they are not possible * ValueErrors if arguments are invalid last two are callers errors and avoidable. Signed-off-by: Jussi Kukkonen --- tests/test_metadata_bundle.py | 140 +++++++++++++++++++++------ tuf/client_rework/metadata_bundle.py | 58 +++++------ 2 files changed, 136 insertions(+), 62 deletions(-) diff --git a/tests/test_metadata_bundle.py b/tests/test_metadata_bundle.py index b566a7a27b..27b35daa2f 100644 --- a/tests/test_metadata_bundle.py +++ b/tests/test_metadata_bundle.py @@ -1,3 +1,4 @@ +import json import logging import os import shutil @@ -5,7 +6,8 @@ import tempfile import unittest -from tuf.api import metadata +from tuf import exceptions +from tuf.api.metadata import Metadata from tuf.client_rework.metadata_bundle import MetadataBundle from tests import utils @@ -15,65 +17,67 @@ class TestMetadataBundle(unittest.TestCase): @classmethod def setUpClass(cls): - cls.temporary_directory = tempfile.mkdtemp(dir=os.getcwd()) + cls.temp_dir = tempfile.mkdtemp(dir=os.getcwd()) @classmethod def tearDownClass(cls): - shutil.rmtree(cls.temporary_directory) + shutil.rmtree(cls.temp_dir) def setUp(self): # copy metadata to "local repo" shutil.copytree( os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata'), - self.temporary_directory, + self.temp_dir, dirs_exist_ok=True ) def test_local_load(self): # test loading all local metadata succesfully - bundle = MetadataBundle(self.temporary_directory) + bundle = MetadataBundle(self.temp_dir) bundle.root_update_finished() - self.assertTrue(bundle.load_local_timestamp()) - self.assertTrue(bundle.load_local_snapshot()) - self.assertTrue(bundle.load_local_targets()) - self.assertTrue(bundle.load_local_delegated_targets('role1','targets')) - self.assertTrue(bundle.load_local_delegated_targets('role2','role1')) + bundle.load_local_timestamp() + bundle.load_local_snapshot() + bundle.load_local_targets() + bundle.load_local_delegated_targets('role1','targets') + bundle.load_local_delegated_targets('role2','role1') # Make sure loading metadata without its "dependencies" fails - bundle = MetadataBundle(self.temporary_directory) + bundle = MetadataBundle(self.temp_dir) with self.assertRaises(RuntimeError): bundle.load_local_timestamp() bundle.root_update_finished() with self.assertRaises(RuntimeError): bundle.load_local_snapshot() - self.assertTrue(bundle.load_local_timestamp()) + bundle.load_local_timestamp() with self.assertRaises(RuntimeError): bundle.load_local_targets() - self.assertTrue(bundle.load_local_snapshot()) + bundle.load_local_snapshot() with self.assertRaises(RuntimeError): bundle.load_local_delegated_targets('role1','targets') - self.assertTrue(bundle.load_local_targets()) + bundle.load_local_targets() with self.assertRaises(RuntimeError): bundle.load_local_delegated_targets('role2','role1') - self.assertTrue(bundle.load_local_delegated_targets('role1','targets')) - self.assertTrue(bundle.load_local_delegated_targets('role2','role1')) + bundle.load_local_delegated_targets('role1','targets') + bundle.load_local_delegated_targets('role2','role1') def test_update(self): remote_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') # remove all but root.json from local repo - os.remove(os.path.join(self.temporary_directory, "timestamp.json")) - os.remove(os.path.join(self.temporary_directory, "snapshot.json")) - os.remove(os.path.join(self.temporary_directory, "targets.json")) - os.remove(os.path.join(self.temporary_directory, "role1.json")) - os.remove(os.path.join(self.temporary_directory, "role2.json")) - - # test updating metadata succesfully - bundle = MetadataBundle(self.temporary_directory) + os.remove(os.path.join(self.temp_dir, "timestamp.json")) + os.remove(os.path.join(self.temp_dir, "snapshot.json")) + os.remove(os.path.join(self.temp_dir, "targets.json")) + os.remove(os.path.join(self.temp_dir, "role1.json")) + os.remove(os.path.join(self.temp_dir, "role2.json")) + + bundle = MetadataBundle(self.temp_dir) bundle.root_update_finished() + # test local load failure, then updating metadata succesfully + with self.assertRaises(exceptions.RepositoryError): + bundle.load_local_timestamp() with open(os.path.join(remote_dir, "timestamp.json"), "rb") as f: bundle.update_timestamp(f.read()) with open(os.path.join(remote_dir, "snapshot.json"), "rb") as f: @@ -86,16 +90,92 @@ def test_update(self): bundle.update_delegated_targets(f.read(), "role2", "role1") # test loading the metadata (that should now be locally available) - bundle = MetadataBundle(self.temporary_directory) + bundle = MetadataBundle(self.temp_dir) bundle.root_update_finished() - self.assertTrue(bundle.load_local_timestamp()) - self.assertTrue(bundle.load_local_snapshot()) - self.assertTrue(bundle.load_local_targets()) - self.assertTrue(bundle.load_local_delegated_targets('role1','targets')) - self.assertTrue(bundle.load_local_delegated_targets('role2','role1')) + bundle.load_local_timestamp() + bundle.load_local_snapshot() + bundle.load_local_targets() + bundle.load_local_delegated_targets('role1','targets') + bundle.load_local_delegated_targets('role2','role1') # TODO test loading one version, then updating to new versions of each metadata + def test_local_load_with_invalid_data(self): + # Test root and one of the top-level metadata files + + with tempfile.TemporaryDirectory() as tempdir: + # Missing root.json + with self.assertRaises(exceptions.RepositoryError): + MetadataBundle(tempdir) + + # root.json not a json file at all + with open(os.path.join(tempdir, "root.json"), "w") as f: + f.write("") + with self.assertRaises(exceptions.RepositoryError): + MetadataBundle(tempdir) + + # root.json does not validate + md = Metadata.from_file(os.path.join(self.temp_dir, "root.json")) + md.signed.version += 1 + md.to_file(os.path.join(tempdir, "root.json")) + with self.assertRaises(exceptions.RepositoryError): + MetadataBundle(tempdir) + + md.signed.version -= 1 + md.to_file(os.path.join(tempdir, "root.json")) + bundle = MetadataBundle(tempdir) + bundle.root_update_finished() + + # Missing timestamp.json + with self.assertRaises(exceptions.RepositoryError): + bundle.load_local_timestamp() + + # timestamp not a json file at all + with open(os.path.join(tempdir, "timestamp.json"), "w") as f: + f.write("") + with self.assertRaises(exceptions.RepositoryError): + bundle.load_local_timestamp() + + # timestamp does not validate + md = Metadata.from_file(os.path.join(self.temp_dir, "timestamp.json")) + md.signed.version += 1 + md.to_file(os.path.join(tempdir, "timestamp.json")) + with self.assertRaises(exceptions.RepositoryError): + bundle.load_local_timestamp() + + md.signed.version -= 1 + md.to_file(os.path.join(tempdir, "timestamp.json")) + bundle.load_local_timestamp() + + def test_update_with_invalid_data(self): + # Test on of the top level metadata files + + timestamp_md = Metadata.from_file(os.path.join(self.temp_dir, "timestamp.json")) + + # remove all but root.json from local repo + os.remove(os.path.join(self.temp_dir, "timestamp.json")) + os.remove(os.path.join(self.temp_dir, "snapshot.json")) + os.remove(os.path.join(self.temp_dir, "targets.json")) + os.remove(os.path.join(self.temp_dir, "role1.json")) + os.remove(os.path.join(self.temp_dir, "role2.json")) + + bundle = MetadataBundle(self.temp_dir) + bundle.root_update_finished() + + # timestamp not a json file at all + with self.assertRaises(exceptions.RepositoryError): + bundle.update_timestamp(b"") + + # timestamp does not validate + timestamp_md.signed.version += 1 + data = timestamp_md.to_dict() + with self.assertRaises(exceptions.RepositoryError): + bundle.update_timestamp(json.dumps(data).encode()) + + timestamp_md.signed.version -= 1 + data = timestamp_md.to_dict() + bundle.update_timestamp(json.dumps(data).encode()) + if __name__ == '__main__': utils.configure_test_logging(sys.argv) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 8e0e9e1b94..2a102d6094 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -210,19 +210,17 @@ def root_update_finished(self): self._root_update_finished = True logger.debug("Verified final root.json") - def load_local_timestamp(self) -> bool: - """Load cached timestamp metadata from local storage. - - Returns True if timestamp was succesfully loaded""" + def load_local_timestamp(self): + """Load cached timestamp metadata from local storage.""" logger.debug("Loading local timestamp") try: with open(os.path.join(self._path, "timestamp.json"), "rb") as f: self._load_timestamp(f.read()) - return True - except (OSError, exceptions.RepositoryError) as e: - logger.debug("Failed to load local timestamp: %s", e) - return False + except OSError as e: + raise exceptions.RepositoryError( + "Failed to load local timestamp" + ) from e def update_timestamp(self, data: bytes): """Update timestamp metadata with data from remote repository.""" @@ -232,19 +230,17 @@ def update_timestamp(self, data: bytes): with open(os.path.join(self._path, "timestamp.json"), "wb") as f: f.write(data) - def load_local_snapshot(self) -> bool: - """Load cached snapshot metadata from local storage. - - Returns True if snapshot was succesfully loaded""" + def load_local_snapshot(self): + """Load cached snapshot metadata from local storage.""" logger.debug("Loading local snapshot") try: with open(os.path.join(self._path, "snapshot.json"), "rb") as f: self._load_snapshot(f.read()) - return True - except (OSError, exceptions.RepositoryError) as e: - logger.debug("Failed to load local snapshot: %s", e) - return False + except OSError as e: + raise exceptions.RepositoryError( + "Failed to load local snapshot" + ) from e def update_snapshot(self, data: bytes): """Update snapshot metadata with data from remote repository.""" @@ -254,27 +250,21 @@ def update_snapshot(self, data: bytes): with open(os.path.join(self._path, "snapshot.json"), "wb") as f: f.write(data) - def load_local_targets(self) -> bool: - """Load cached targets metadata from local storage. - - Returns True if targets was succesfully loaded""" - return self.load_local_delegated_targets("targets", "root") + def load_local_targets(self): + """Load cached targets metadata from local storage.""" + self.load_local_delegated_targets("targets", "root") def update_targets(self, data: bytes): """Update targets metadata with data from remote repository.""" self.update_delegated_targets(data, "targets", "root") - def load_local_delegated_targets( - self, role_name: str, delegator_name: str - ) -> bool: + def load_local_delegated_targets(self, role_name: str, delegator_name: str): """Load cached metadata for 'role_name' from local storage. Metadata for 'delegator_name' must be loaded already. - - Returns True if metadata was succesfully loaded""" + """ if self.get(role_name): logger.debug("Local %s already loaded", role_name) - return True logger.debug("Loading local %s", role_name) @@ -283,10 +273,10 @@ def load_local_delegated_targets( self._load_delegated_targets( f.read(), role_name, delegator_name ) - return True - except (OSError, exceptions.RepositoryError) as e: - logger.debug("Failed to load local %s: %s", role_name, e) - return False + except OSError as e: + raise exceptions.RepositoryError( + f"Failed to load local {role_name}" + ) from e def update_delegated_targets( self, data: bytes, role_name: str, delegator_name: str = None @@ -306,7 +296,9 @@ def _load_intermediate_root(self, data: bytes): Note that an expired intermediate root is considered valid: expiry is only checked for the final root in root_update_finished().""" if self._root_update_finished: - raise RuntimeError("Cannot update root after root update is finished") + raise RuntimeError( + "Cannot update root after root update is finished" + ) try: new_root = Metadata.from_bytes(data) @@ -405,6 +397,7 @@ def _load_snapshot(self, data: bytes): # pylint: disable=too-many-branches digest_object.update(data) observed_hash = digest_object.hexdigest() if observed_hash != stored_hash: + # TODO: Error should derive from RepositoryError raise exceptions.BadHashError(stored_hash, observed_hash) try: @@ -482,6 +475,7 @@ def _load_delegated_targets( digest_object.update(data) observed_hash = digest_object.hexdigest() if observed_hash != stored_hash: + # TODO: Error should derive from RepositoryError raise exceptions.BadHashError(stored_hash, observed_hash) try: From a371258be8d043502ba511a37b3c8af5e9b1a9bb Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 May 2021 20:43:08 +0300 Subject: [PATCH 71/86] MetadataBundle: Use builtin errors when possible There's on value in using custom errors when builtins work. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 2a102d6094..7976a2c536 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -117,7 +117,7 @@ def verify_with_threshold( raise ValueError("Call is valid only on delegator metadata") if role is None: - raise exceptions.UnknownRoleError + raise ValueError(f"Delegated role {role_name} not found") # verify that delegate is signed by correct threshold of unique keys unique_keys = set() From 6b53ac78d07a8e1dbc4c77ad53c812cce3310dd3 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Sun, 16 May 2021 12:16:55 +0300 Subject: [PATCH 72/86] Make BadHashError derive from RepositoryError This is backwards-compatible and means that most (all?) errors resulting from suspicious or broken metadata are now RepositoryErrors. Signed-off-by: Jussi Kukkonen --- tuf/exceptions.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tuf/exceptions.py b/tuf/exceptions.py index c5f795e0c6..af82c92a89 100755 --- a/tuf/exceptions.py +++ b/tuf/exceptions.py @@ -66,7 +66,11 @@ class UnsupportedAlgorithmError(Error): """Indicate an error while trying to identify a user-specified algorithm.""" -class BadHashError(Error): +class RepositoryError(Error): + """Indicate an error with a repository's state, such as a missing file.""" + + +class BadHashError(RepositoryError): """Indicate an error while checking the value of a hash object.""" def __init__(self, expected_hash, observed_hash): @@ -97,10 +101,6 @@ class UnknownKeyError(Error): """Indicate an error while verifying key-like objects (e.g., keyids).""" -class RepositoryError(Error): - """Indicate an error with a repository's state, such as a missing file.""" - - class BadVersionNumberError(RepositoryError): """Indicate an error for metadata that contains an invalid version number.""" From f2cff951a6ebd0834089ad53f64abc44a650d8fd Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Sun, 16 May 2021 12:24:17 +0300 Subject: [PATCH 73/86] MetadataBundle: Don't do any file IO Remove file IO from MetadataBundle: * This make the bundle API very clear and easy to understand * This means caller must now read from and persist data to disk but initial prototypes suggest this won't make Updater too complex This change is something we can still back out from if it turns out to be the wrong decision: the file-persisting MetadataBundle has been tested and works fine. Signed-off-by: Jussi Kukkonen --- tests/test_metadata_bundle.py | 206 +++++++++---------------- tuf/client_rework/metadata_bundle.py | 222 +++++++-------------------- 2 files changed, 133 insertions(+), 295 deletions(-) diff --git a/tests/test_metadata_bundle.py b/tests/test_metadata_bundle.py index 27b35daa2f..e758e6e7bd 100644 --- a/tests/test_metadata_bundle.py +++ b/tests/test_metadata_bundle.py @@ -15,166 +15,108 @@ logger = logging.getLogger(__name__) class TestMetadataBundle(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.temp_dir = tempfile.mkdtemp(dir=os.getcwd()) - - @classmethod - def tearDownClass(cls): - shutil.rmtree(cls.temp_dir) - - def setUp(self): - # copy metadata to "local repo" - shutil.copytree( - os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata'), - self.temp_dir, - dirs_exist_ok=True - ) - - def test_local_load(self): - - # test loading all local metadata succesfully - bundle = MetadataBundle(self.temp_dir) - bundle.root_update_finished() - bundle.load_local_timestamp() - bundle.load_local_snapshot() - bundle.load_local_targets() - bundle.load_local_delegated_targets('role1','targets') - bundle.load_local_delegated_targets('role2','role1') - - # Make sure loading metadata without its "dependencies" fails - bundle = MetadataBundle(self.temp_dir) - - with self.assertRaises(RuntimeError): - bundle.load_local_timestamp() - bundle.root_update_finished() - with self.assertRaises(RuntimeError): - bundle.load_local_snapshot() - bundle.load_local_timestamp() - with self.assertRaises(RuntimeError): - bundle.load_local_targets() - bundle.load_local_snapshot() - with self.assertRaises(RuntimeError): - bundle.load_local_delegated_targets('role1','targets') - bundle.load_local_targets() - with self.assertRaises(RuntimeError): - bundle.load_local_delegated_targets('role2','role1') - bundle.load_local_delegated_targets('role1','targets') - bundle.load_local_delegated_targets('role2','role1') def test_update(self): - remote_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') - - # remove all but root.json from local repo - os.remove(os.path.join(self.temp_dir, "timestamp.json")) - os.remove(os.path.join(self.temp_dir, "snapshot.json")) - os.remove(os.path.join(self.temp_dir, "targets.json")) - os.remove(os.path.join(self.temp_dir, "role1.json")) - os.remove(os.path.join(self.temp_dir, "role2.json")) + repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') - bundle = MetadataBundle(self.temp_dir) + with open(os.path.join(repo_dir, "root.json"), "rb") as f: + bundle = MetadataBundle(f.read()) bundle.root_update_finished() - # test local load failure, then updating metadata succesfully - with self.assertRaises(exceptions.RepositoryError): - bundle.load_local_timestamp() - with open(os.path.join(remote_dir, "timestamp.json"), "rb") as f: + with open(os.path.join(repo_dir, "timestamp.json"), "rb") as f: bundle.update_timestamp(f.read()) - with open(os.path.join(remote_dir, "snapshot.json"), "rb") as f: + with open(os.path.join(repo_dir, "snapshot.json"), "rb") as f: bundle.update_snapshot(f.read()) - with open(os.path.join(remote_dir, "targets.json"), "rb") as f: + with open(os.path.join(repo_dir, "targets.json"), "rb") as f: bundle.update_targets(f.read()) - with open(os.path.join(remote_dir, "role1.json"), "rb") as f: + with open(os.path.join(repo_dir, "role1.json"), "rb") as f: bundle.update_delegated_targets(f.read(), "role1", "targets") - with open(os.path.join(remote_dir, "role2.json"), "rb") as f: + with open(os.path.join(repo_dir, "role2.json"), "rb") as f: bundle.update_delegated_targets(f.read(), "role2", "role1") - # test loading the metadata (that should now be locally available) - bundle = MetadataBundle(self.temp_dir) - bundle.root_update_finished() - bundle.load_local_timestamp() - bundle.load_local_snapshot() - bundle.load_local_targets() - bundle.load_local_delegated_targets('role1','targets') - bundle.load_local_delegated_targets('role2','role1') - - # TODO test loading one version, then updating to new versions of each metadata + def test_out_of_order_ops(self): + repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') + data={} + for md in ["root", "timestamp", "snapshot", "targets", "role1"]: + with open(os.path.join(repo_dir, f"{md}.json"), "rb") as f: + data[md] = f.read() - def test_local_load_with_invalid_data(self): - # Test root and one of the top-level metadata files + bundle = MetadataBundle(data["root"]) - with tempfile.TemporaryDirectory() as tempdir: - # Missing root.json - with self.assertRaises(exceptions.RepositoryError): - MetadataBundle(tempdir) + # Update timestamp before root is finished + with self.assertRaises(RuntimeError): + bundle.update_timestamp(data["timestamp"]) - # root.json not a json file at all - with open(os.path.join(tempdir, "root.json"), "w") as f: - f.write("") - with self.assertRaises(exceptions.RepositoryError): - MetadataBundle(tempdir) + bundle.root_update_finished() + with self.assertRaises(RuntimeError): + bundle.root_update_finished() - # root.json does not validate - md = Metadata.from_file(os.path.join(self.temp_dir, "root.json")) - md.signed.version += 1 - md.to_file(os.path.join(tempdir, "root.json")) - with self.assertRaises(exceptions.RepositoryError): - MetadataBundle(tempdir) + # Update snapshot before timestamp + with self.assertRaises(RuntimeError): + bundle.update_snapshot(data["snapshot"]) - md.signed.version -= 1 - md.to_file(os.path.join(tempdir, "root.json")) - bundle = MetadataBundle(tempdir) - bundle.root_update_finished() + bundle.update_timestamp(data["timestamp"]) - # Missing timestamp.json - with self.assertRaises(exceptions.RepositoryError): - bundle.load_local_timestamp() + # Update targets before snapshot + with self.assertRaises(RuntimeError): + bundle.update_targets(data["targets"]) - # timestamp not a json file at all - with open(os.path.join(tempdir, "timestamp.json"), "w") as f: - f.write("") - with self.assertRaises(exceptions.RepositoryError): - bundle.load_local_timestamp() + bundle.update_snapshot(data["snapshot"]) - # timestamp does not validate - md = Metadata.from_file(os.path.join(self.temp_dir, "timestamp.json")) - md.signed.version += 1 - md.to_file(os.path.join(tempdir, "timestamp.json")) - with self.assertRaises(exceptions.RepositoryError): - bundle.load_local_timestamp() + #update timestamp after snapshot + with self.assertRaises(RuntimeError): + bundle.update_timestamp(data["timestamp"]) - md.signed.version -= 1 - md.to_file(os.path.join(tempdir, "timestamp.json")) - bundle.load_local_timestamp() + # Update delegated targets before targets + with self.assertRaises(RuntimeError): + bundle.update_delegated_targets(data["role1"], "role1", "targets") - def test_update_with_invalid_data(self): - # Test on of the top level metadata files + bundle.update_targets(data["targets"]) + bundle.update_delegated_targets(data["role1"], "role1", "targets") - timestamp_md = Metadata.from_file(os.path.join(self.temp_dir, "timestamp.json")) + def test_update_with_invalid_json(self): + repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') + data={} + for md in ["root", "timestamp", "snapshot", "targets", "role1"]: + with open(os.path.join(repo_dir, f"{md}.json"), "rb") as f: + data[md] = f.read() - # remove all but root.json from local repo - os.remove(os.path.join(self.temp_dir, "timestamp.json")) - os.remove(os.path.join(self.temp_dir, "snapshot.json")) - os.remove(os.path.join(self.temp_dir, "targets.json")) - os.remove(os.path.join(self.temp_dir, "role1.json")) - os.remove(os.path.join(self.temp_dir, "role2.json")) + # root.json not a json file at all + with self.assertRaises(exceptions.RepositoryError): + MetadataBundle(b"") + # root.json is invalid + root = Metadata.from_bytes(data["root"]) + root.signed.version += 1 + with self.assertRaises(exceptions.RepositoryError): + MetadataBundle(json.dumps(root.to_dict()).encode()) - bundle = MetadataBundle(self.temp_dir) + bundle = MetadataBundle(data["root"]) bundle.root_update_finished() - # timestamp not a json file at all - with self.assertRaises(exceptions.RepositoryError): - bundle.update_timestamp(b"") + top_level_md = [ + (data["timestamp"], bundle.update_timestamp), + (data["snapshot"], bundle.update_snapshot), + (data["targets"], bundle.update_targets), + ] + for metadata, update_func in top_level_md: + # metadata is not json + with self.assertRaises(exceptions.RepositoryError): + update_func(b"") + # metadata is invalid + md = Metadata.from_bytes(metadata) + md.signed.version += 1 + with self.assertRaises(exceptions.RepositoryError): + update_func(json.dumps(md.to_dict()).encode()) + + # metadata is of wrong type + with self.assertRaises(exceptions.RepositoryError): + update_func(data["root"]) + + update_func(metadata) - # timestamp does not validate - timestamp_md.signed.version += 1 - data = timestamp_md.to_dict() - with self.assertRaises(exceptions.RepositoryError): - bundle.update_timestamp(json.dumps(data).encode()) - timestamp_md.signed.version -= 1 - data = timestamp_md.to_dict() - bundle.update_timestamp(json.dumps(data).encode()) + # TODO test updating over initial metadata (new keys, newer timestamp, etc) + # TODO test the actual specification checks if __name__ == '__main__': diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 7976a2c536..470f750ebc 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -6,83 +6,66 @@ MetadataBundle keeps track of current valid set of metadata for the client, and handles almost every step of the "Detailed client workflow" ( https://theupdateframework.github.io/specification/latest#detailed-client-workflow) -in the TUF specification (the remaining steps are download related). The -bundle takes care of persisting valid metadata on disk and loading local -metadata from disk. +in the TUF specification: the remaining steps are related to filesystem and +network IO which is not handled here. Loaded metadata can be accessed via the index access with rolename as key -or, in the case of top-level metadata using the helper properties like -'MetadataBundle.root' +(bundle["root"]) or, in the case of top-level metadata using the helper +properties (bundle.root). The rules for top-level metadata are * Metadata is loadable only if metadata it depends on is loaded * Metadata is immutable if any metadata depending on it has been loaded * Metadata must be loaded/updated in order: root -> timestamp -> snapshot -> targets -> (other delegated targets) - * For each metadata either local load or the remote update must succeed - * Caller should try loading local version before updating metadata from remote - (the exception is root where local data is loaded at MetadataBundle - initialization: the initialization fails if local data cannot be loaded) -Exceptions are raised if metadata fails to load in any way. The exception -to this is local loads -- only local root metadata needs to be valid: -other local metadata is allowed to be invalid (e.g. no longer signed): -it won't be loaded but there will not be an exception. +Exceptions are raised if metadata fails to load in any way. -Example (with hypothetical download function): +Example of loading root, timestamp and snapshot: ->>> # Load local root ->>> bundle = MetadataBundle("path/to/metadata") +>>> # Load local root (RepositoryErrors here stop the update) +>>> with open(root_path, "rb") as f: +>>> bundle = MetadataBundle(f.read()) >>> ->>> # update root until no more are available from remote +>>> # update root from remote until no more are available >>> with download("root", bundle.root.signed.version + 1) as f: >>> bundle.update_root(f.read()) >>> # ... >>> bundle.root_update_finished() >>> ->>> # load timestamp, then update from remote ->>> bundle.load_local_timestamp() +>>> # load local timestamp, then update from remote +>>> try: +>>> with open(timestamp_path, "rb") as f: +>>> bundle.update_timestamp(f.read()) +>>> except (RepositoryError, OSError): +>>> pass # failure to load a local file is ok +>>> >>> with download("timestamp") as f: >>> bundle.update_timestamp(f.read()) >>> ->>> # load snapshot, update from remote if needed ->>> if not bundle.load_local_snapshot(): ->>> # TODO get version from timestamp +>>> # load local snapshot, then update from remote if needed +>>> try: +>>> with open(snapshot_path, "rb") as f: +>>> bundle.update_snapshot(f.read()) +>>> except (RepositoryError, OSError): +>>> # local snapshot is not valid, load from remote +>>> # (RepositoryErrors here stop the update) >>> with download("snapshot", version) as f: >>> bundle.update_snapshot(f.read()) ->>> ->>> # load local targets, update from remote if needed ->>> if not bundle.load_local_targets(): ->>> # TODO get version from snapshot ->>> with download("targets", version) as f: ->>> bundle.update_targets(f.read()) ->>> ->>> # load local delegated role, update from remote if needed ->>> if not bundle.load_local_delegated_targets("rolename", "targets"): ->>> # TODO get version from snapshot ->>> with download("rolename", version) as f: ->>> bundle.update_targets(f.read(), "rolename", "targets") - TODO: - * exceptions are all over the place: the idea is that client could just handle + * exceptions are not final: the idea is that client could just handle a generic RepositoryError that covers every issue that server provided metadata could inflict (other errors would be user errors), but this is not yet the case * usefulness of root_update_finished() can be debated: it could be done in the beginning of load_timestamp()... - * there are some divergences from spec: in general local metadata files are - not deleted (they just won't succesfully load) - * a bit of repetition - * No tests! - * Naming maybe not final? * some metadata interactions might work better in Metadata itself * Progress through Specification update process should be documented (not sure yet how: maybe a spec_logger that logs specification events?) """ import logging -import os from collections import abc from datetime import datetime from typing import Dict, Iterator, Optional @@ -142,23 +125,16 @@ class MetadataBundle(abc.Mapping): update the metadata with the caller making decisions on what is updated. """ - def __init__(self, repository_path: str): - """Initialize by loading root metadata from disk""" - self._path = repository_path + def __init__(self, data: bytes): + """Initialize by loading trusted root metadata""" self._bundle = {} # type: Dict[str: Metadata] self.reference_time = datetime.utcnow() self._root_update_finished = False # Load and validate the local root metadata # Valid root metadata is required - logger.debug("Loading local root") - try: - with open(os.path.join(self._path, "root.json"), "rb") as f: - self._load_intermediate_root(f.read()) - except (OSError, exceptions.RepositoryError) as e: - raise exceptions.RepositoryError( - "Failed to load local root metadata" - ) from e + logger.debug("Updating initial trusted root") + self.update_root(data) # Implement Mapping def __getitem__(self, key: str) -> Metadata: @@ -187,110 +163,8 @@ def snapshot(self) -> Optional[Metadata]: def targets(self) -> Optional[Metadata]: return self._bundle.get("targets") - # Public methods + # Methods for updating metadata def update_root(self, data: bytes): - """Update root metadata with data from remote repository.""" - logger.debug("Updating root") - - self._load_intermediate_root(data) - with open(os.path.join(self._path, "root.json"), "wb") as f: - f.write(data) - - def root_update_finished(self): - """Mark root metadata as final.""" - if self._root_update_finished: - raise RuntimeError("Root update is already finished") - - if self.root.signed.is_expired(self.reference_time): - raise exceptions.ExpiredMetadataError("New root.json is expired") - - # We skip specification step 5.3.11: deleting timestamp and snapshot - # with rotated keys is not needed as they will be invalid, are not - # loaded and cannot be loaded - self._root_update_finished = True - logger.debug("Verified final root.json") - - def load_local_timestamp(self): - """Load cached timestamp metadata from local storage.""" - logger.debug("Loading local timestamp") - - try: - with open(os.path.join(self._path, "timestamp.json"), "rb") as f: - self._load_timestamp(f.read()) - except OSError as e: - raise exceptions.RepositoryError( - "Failed to load local timestamp" - ) from e - - def update_timestamp(self, data: bytes): - """Update timestamp metadata with data from remote repository.""" - logger.debug("Updating timestamp") - - self._load_timestamp(data) - with open(os.path.join(self._path, "timestamp.json"), "wb") as f: - f.write(data) - - def load_local_snapshot(self): - """Load cached snapshot metadata from local storage.""" - logger.debug("Loading local snapshot") - - try: - with open(os.path.join(self._path, "snapshot.json"), "rb") as f: - self._load_snapshot(f.read()) - except OSError as e: - raise exceptions.RepositoryError( - "Failed to load local snapshot" - ) from e - - def update_snapshot(self, data: bytes): - """Update snapshot metadata with data from remote repository.""" - logger.debug("Updating snapshot") - - self._load_snapshot(data) - with open(os.path.join(self._path, "snapshot.json"), "wb") as f: - f.write(data) - - def load_local_targets(self): - """Load cached targets metadata from local storage.""" - self.load_local_delegated_targets("targets", "root") - - def update_targets(self, data: bytes): - """Update targets metadata with data from remote repository.""" - self.update_delegated_targets(data, "targets", "root") - - def load_local_delegated_targets(self, role_name: str, delegator_name: str): - """Load cached metadata for 'role_name' from local storage. - - Metadata for 'delegator_name' must be loaded already. - """ - if self.get(role_name): - logger.debug("Local %s already loaded", role_name) - - logger.debug("Loading local %s", role_name) - - try: - with open(os.path.join(self._path, f"{role_name}.json"), "rb") as f: - self._load_delegated_targets( - f.read(), role_name, delegator_name - ) - except OSError as e: - raise exceptions.RepositoryError( - f"Failed to load local {role_name}" - ) from e - - def update_delegated_targets( - self, data: bytes, role_name: str, delegator_name: str = None - ): - """Update 'rolename' metadata with data from remote repository. - - Metadata for 'delegator_name' must be loaded already.""" - logger.debug("Updating %s", role_name) - - self._load_delegated_targets(data, role_name, delegator_name) - with open(os.path.join(self._path, f"{role_name}.json"), "wb") as f: - f.write(data) - - def _load_intermediate_root(self, data: bytes): """Verifies and loads 'data' as new root metadata. Note that an expired intermediate root is considered valid: expiry is @@ -299,6 +173,7 @@ def _load_intermediate_root(self, data: bytes): raise RuntimeError( "Cannot update root after root update is finished" ) + logger.debug("Updating root") try: new_root = Metadata.from_bytes(data) @@ -328,9 +203,23 @@ def _load_intermediate_root(self, data: bytes): ) self._bundle["root"] = new_root - logger.debug("Loaded root") + logger.debug("Updated root") + + def root_update_finished(self): + """Mark root metadata as final.""" + if self._root_update_finished: + raise RuntimeError("Root update is already finished") + + if self.root.signed.is_expired(self.reference_time): + raise exceptions.ExpiredMetadataError("New root.json is expired") - def _load_timestamp(self, data: bytes): + # We skip specification step 5.3.11: deleting timestamp and snapshot + # with rotated keys is not needed as they will be invalid, are not + # loaded and cannot be loaded + self._root_update_finished = True + logger.debug("Verified final root.json") + + def update_timestamp(self, data: bytes): """Verifies and loads 'data' as new timestamp metadata.""" if not self._root_update_finished: # root_update_finished() not called @@ -377,16 +266,17 @@ def _load_timestamp(self, data: bytes): raise exceptions.ExpiredMetadataError("New timestamp is expired") self._bundle["timestamp"] = new_timestamp - logger.debug("Loaded timestamp") + logger.debug("Updated timestamp") # TODO: remove pylint disable once the hash verification is in metadata.py - def _load_snapshot(self, data: bytes): # pylint: disable=too-many-branches + def update_snapshot(self, data: bytes): # pylint: disable=too-many-branches """Verifies and loads 'data' as new snapshot metadata.""" if self.timestamp is None: raise RuntimeError("Cannot update snapshot before timestamp") if self.targets is not None: raise RuntimeError("Cannot update snapshot after targets") + logger.debug("Updating snapshot") meta = self.timestamp.signed.meta["snapshot.json"] @@ -446,9 +336,13 @@ def _load_snapshot(self, data: bytes): # pylint: disable=too-many-branches raise exceptions.ExpiredMetadataError("New snapshot is expired") self._bundle["snapshot"] = new_snapshot - logger.debug("Loaded snapshot") + logger.debug("Updated snapshot") - def _load_delegated_targets( + def update_targets(self, data: bytes): + """Verifies and loads 'data' as new top-level targets metadata.""" + self.update_delegated_targets(data, "targets", "root") + + def update_delegated_targets( self, data: bytes, role_name: str, delegator_name: str ): """Verifies and loads 'data' as new metadata for target 'role_name'. @@ -462,6 +356,8 @@ def _load_delegated_targets( if delegator is None: raise RuntimeError("Cannot load targets before delegator") + logger.debug("Updating %s delegated by %s", role_name, delegator_name) + # Verify against the hashes in snapshot, if any meta = self.snapshot.signed.meta.get(f"{role_name}.json") if meta is None: @@ -504,4 +400,4 @@ def _load_delegated_targets( raise exceptions.ExpiredMetadataError(f"New {role_name} is expired") self._bundle[role_name] = new_delegate - logger.debug("Loaded %s delegated by %s", role_name, delegator_name) + logger.debug("Updated %s delegated by %s", role_name, delegator_name) From 377eac18f6313181c77b609dda3e8309d97da7aa Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Mon, 17 May 2021 18:46:05 +0300 Subject: [PATCH 74/86] MetadataBundle: Improve docstrings Document arguments and exceptions, improve prose in general. Remove mention of local file deletion now that file IO is not done here. Signed-off-by: Jussi Kukkonen --- tuf/client_rework/metadata_bundle.py | 96 ++++++++++++++++++++++------ 1 file changed, 77 insertions(+), 19 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 470f750ebc..87d4d0e62c 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -19,6 +19,7 @@ * Metadata must be loaded/updated in order: root -> timestamp -> snapshot -> targets -> (other delegated targets) + Exceptions are raised if metadata fails to load in any way. Example of loading root, timestamp and snapshot: @@ -121,46 +122,63 @@ def verify_with_threshold( class MetadataBundle(abc.Mapping): """Internal class to keep track of valid metadata in Updater - MetadataBundle ensures that metadata is valid. It provides easy ways to - update the metadata with the caller making decisions on what is updated. + MetadataBundle ensures that the collection of metadata in the bundle is + valid. It provides easy ways to update the metadata with the caller making + decisions on what is updated. """ - def __init__(self, data: bytes): - """Initialize by loading trusted root metadata""" + def __init__(self, root_data: bytes): + """Initialize bundle by loading trusted root metadata + + Args: + root_data: Trusted root metadata as bytes. Note that this metadata + will only be verified by itself: it is the source of trust for + all metadata in the bundle. + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ self._bundle = {} # type: Dict[str: Metadata] self.reference_time = datetime.utcnow() self._root_update_finished = False - # Load and validate the local root metadata - # Valid root metadata is required + # Load and validate the local root metadata. Valid initial trusted root + # metadata is required logger.debug("Updating initial trusted root") - self.update_root(data) + self.update_root(root_data) - # Implement Mapping - def __getitem__(self, key: str) -> Metadata: - return self._bundle[key] + def __getitem__(self, role: str) -> Metadata: + """Returns current Metadata for 'role'""" + return self._bundle[role] def __len__(self) -> int: + """Returns number of Metadata objects in bundle""" return len(self._bundle) def __iter__(self) -> Iterator[Metadata]: + """Returns iterator over all Metadata objects in bundle""" return iter(self._bundle) # Helper properties for top level metadata @property def root(self) -> Optional[Metadata]: + """Current root Metadata or None""" return self._bundle.get("root") @property def timestamp(self) -> Optional[Metadata]: + """Current timestamp Metadata or None""" return self._bundle.get("timestamp") @property def snapshot(self) -> Optional[Metadata]: + """Current snapshot Metadata or None""" return self._bundle.get("snapshot") @property def targets(self) -> Optional[Metadata]: + """Current targets Metadata or None""" return self._bundle.get("targets") # Methods for updating metadata @@ -168,7 +186,15 @@ def update_root(self, data: bytes): """Verifies and loads 'data' as new root metadata. Note that an expired intermediate root is considered valid: expiry is - only checked for the final root in root_update_finished().""" + only checked for the final root in root_update_finished(). + + Args: + data: unverified new root metadata as bytes + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ if self._root_update_finished: raise RuntimeError( "Cannot update root after root update is finished" @@ -206,21 +232,30 @@ def update_root(self, data: bytes): logger.debug("Updated root") def root_update_finished(self): - """Mark root metadata as final.""" + """Marks root metadata as final and verifies it is not expired + + Raises: + ExpiredMetadataError: The final root metadata is expired. + """ if self._root_update_finished: raise RuntimeError("Root update is already finished") if self.root.signed.is_expired(self.reference_time): raise exceptions.ExpiredMetadataError("New root.json is expired") - # We skip specification step 5.3.11: deleting timestamp and snapshot - # with rotated keys is not needed as they will be invalid, are not - # loaded and cannot be loaded self._root_update_finished = True logger.debug("Verified final root.json") def update_timestamp(self, data: bytes): - """Verifies and loads 'data' as new timestamp metadata.""" + """Verifies and loads 'data' as new timestamp metadata. + + Args: + data: unverified new timestamp metadata as bytes + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ if not self._root_update_finished: # root_update_finished() not called raise RuntimeError("Cannot update timestamp before root") @@ -270,7 +305,15 @@ def update_timestamp(self, data: bytes): # TODO: remove pylint disable once the hash verification is in metadata.py def update_snapshot(self, data: bytes): # pylint: disable=too-many-branches - """Verifies and loads 'data' as new snapshot metadata.""" + """Verifies and loads 'data' as new snapshot metadata. + + Args: + data: unverified new snapshot metadata as bytes + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ if self.timestamp is None: raise RuntimeError("Cannot update snapshot before timestamp") @@ -339,7 +382,15 @@ def update_snapshot(self, data: bytes): # pylint: disable=too-many-branches logger.debug("Updated snapshot") def update_targets(self, data: bytes): - """Verifies and loads 'data' as new top-level targets metadata.""" + """Verifies and loads 'data' as new top-level targets metadata. + + Args: + data: unverified new targets metadata as bytes + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ self.update_delegated_targets(data, "targets", "root") def update_delegated_targets( @@ -347,7 +398,14 @@ def update_delegated_targets( ): """Verifies and loads 'data' as new metadata for target 'role_name'. - Raises if verification fails + Args: + data: unverified new metadata as bytes + role_name: The role name of the new metadata + delegator_name: The name of the role delegating the new metadata + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. """ if self.snapshot is None: raise RuntimeError("Cannot load targets before snapshot") From 1d45c2aa2c478cd8644433ec6edf827d0b408dad Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Mon, 17 May 2021 12:23:55 +0300 Subject: [PATCH 75/86] New Updater: use the MetadataBundle Use the MetadataBundle to verify metadata validity. * Updater now handles reading metadata files (from filesystem as well as network * Updater feeds bytes to MetadataBundle for verification * Updater persists data on disk after it had been verified Signed-off-by: Jussi Kukkonen --- tests/test_updater_rework.py | 17 +- tuf/client_rework/metadata_wrapper.py | 183 ----------- tuf/client_rework/updater_rework.py | 430 ++++++-------------------- 3 files changed, 104 insertions(+), 526 deletions(-) delete mode 100644 tuf/client_rework/metadata_wrapper.py diff --git a/tests/test_updater_rework.py b/tests/test_updater_rework.py index b564fbf57e..4522f1a5c9 100644 --- a/tests/test_updater_rework.py +++ b/tests/test_updater_rework.py @@ -92,7 +92,7 @@ def setUp(self): # for each test case. original_repository = os.path.join(original_repository_files, 'repository') original_keystore = os.path.join(original_repository_files, 'keystore') - original_client = os.path.join(original_repository_files, 'client') + original_client = os.path.join(original_repository_files, 'client', 'test_repository1', 'metadata', 'current') # Save references to the often-needed client repository directories. # Test cases need these references to access metadata and target files. @@ -101,12 +101,7 @@ def setUp(self): self.keystore_directory = \ os.path.join(temporary_repository_root, 'keystore') - self.client_directory = os.path.join(temporary_repository_root, - 'client') - self.client_metadata = os.path.join(self.client_directory, - self.repository_name, 'metadata') - self.client_metadata_current = os.path.join(self.client_metadata, - 'current') + self.client_directory = os.path.join(temporary_repository_root, 'client') # Copy the original 'repository', 'client', and 'keystore' directories # to the temporary repository the test cases can use. @@ -119,15 +114,11 @@ def setUp(self): url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath - # Setting 'tuf.settings.repository_directory' with the temporary client - # directory copied from the original repository files. - tuf.settings.repositories_directory = self.client_directory - metadata_url = f"{url_prefix}/metadata/" targets_url = f"{url_prefix}/targets/" # Creating a repository instance. The test cases will use this client # updater to refresh metadata, fetch target files, etc. - self.repository_updater = updater.Updater(self.repository_name, + self.repository_updater = updater.Updater(self.client_directory, metadata_url, targets_url) @@ -154,7 +145,7 @@ def test_refresh(self): for role in ['root', 'timestamp', 'snapshot', 'targets']: metadata_obj = metadata.Metadata.from_file(os.path.join( - self.client_metadata_current, role + '.json')) + self.client_directory, role + '.json')) metadata_obj_2 = metadata.Metadata.from_file(os.path.join( self.repository_directory, 'metadata', role + '.json')) diff --git a/tuf/client_rework/metadata_wrapper.py b/tuf/client_rework/metadata_wrapper.py deleted file mode 100644 index fbc3335c3e..0000000000 --- a/tuf/client_rework/metadata_wrapper.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2021, New York University and the TUF contributors -# SPDX-License-Identifier: MIT OR Apache-2.0 - -"""Metadata wrapper -""" -import time - -from securesystemslib.keys import format_metadata_to_key - -from tuf import exceptions, formats -from tuf.api import metadata - - -class MetadataWrapper: - """Helper classes extending or adding missing - functionality to metadata API - """ - - def __init__(self, meta): - self._meta = meta - - @classmethod - def from_json_object(cls, raw_data): - """Loads JSON-formatted TUF metadata from a file object.""" - # Use local scope import to avoid circular import errors - # pylint: disable=import-outside-toplevel - from tuf.api.serialization.json import JSONDeserializer - - deserializer = JSONDeserializer() - meta = deserializer.deserialize(raw_data) - return cls(meta=meta) - - @classmethod - def from_json_file(cls, filename): - """Loads JSON-formatted TUF metadata from a file.""" - meta = metadata.Metadata.from_file(filename) - return cls(meta=meta) - - @property - def signed(self): - """ - TODO - """ - return self._meta.signed - - @property - def version(self): - """ - TODO - """ - return self._meta.signed.version - - def verify(self, keys, threshold): - """ - TODO - """ - verified = 0 - # 1.3. Check signatures - for key in keys: - self._meta.verify(key) - verified += 1 - - if verified < threshold: - raise exceptions.InsufficientKeysError - - def persist(self, filename): - """ - TODO - """ - self._meta.to_file(filename) - - def expires(self, reference_time=None): - """ - TODO - """ - if reference_time is None: - expires_timestamp = formats.datetime_to_unix_timestamp( - self._meta.signed.expires - ) - reference_time = int(time.time()) - - if expires_timestamp < reference_time: - raise exceptions.ExpiredMetadataError - - -class RootWrapper(MetadataWrapper): - """ - TODO - """ - - def keys(self, role): - """ - TODO - """ - keys = [] - for keyid in self._meta.signed.roles[role].keyids: - key_metadata = self._meta.signed.keys[keyid].to_dict() - key, dummy = format_metadata_to_key(key_metadata) - keys.append(key) - - return keys - - def threshold(self, role): - """ - TODO - """ - return self._meta.signed.roles[role].threshold - - -class TimestampWrapper(MetadataWrapper): - """ - TODO - """ - - @property - def snapshot(self): - """ - TODO - """ - return self._meta.signed.meta["snapshot.json"] - - -class SnapshotWrapper(MetadataWrapper): - """ - TODO - """ - - def role(self, name): - """ - TODO - """ - return self._meta.signed.meta[name + ".json"] - - -class TargetsWrapper(MetadataWrapper): - """ - TODO - """ - - @property - def targets(self): - """ - TODO - """ - return self._meta.signed.targets - - @property - def delegations(self): - """ - TODO - """ - return self._meta.signed.delegations - - def keys(self, role): - """ - TODO - """ - keys = [] - if self._meta.signed.delegations is not None: - for delegation in self._meta.signed.delegations.roles: - if delegation.name == role: - for keyid in delegation.keyids: - key_metadata = self._meta.signed.delegations.keys[keyid] - key, dummy = format_metadata_to_key( - key_metadata.to_dict() - ) - keys.append(key) - return keys - - return keys - - def threshold(self, role): - """ - TODO - """ - if self._meta.signed.delegations is not None: - for delegation in self._meta.signed.delegations.roles: - if delegation.name == role: - return delegation.threshold - - return None diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index fbf3778ee7..4b326dc5e7 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -17,18 +17,18 @@ from securesystemslib import hash as sslib_hash from securesystemslib import util as sslib_util -from tuf import exceptions, settings +from tuf import exceptions from tuf.client.fetcher import FetcherInterface -from tuf.client_rework import download, requests_fetcher - -from .metadata_wrapper import ( - RootWrapper, - SnapshotWrapper, - TargetsWrapper, - TimestampWrapper, -) +from tuf.client_rework import download, metadata_bundle, requests_fetcher # Globals +MAX_ROOT_ROTATIONS = 32 +MAX_DELEGATIONS = 32 +DEFAULT_ROOT_MAX_LENGTH = 512000 # bytes +DEFAULT_TIMESTAMP_MAX_LENGTH = 16384 # bytes +DEFAULT_SNAPSHOT_MAX_LENGTH = 2000000 # bytes +DEFAULT_TARGETS_MAX_LENGTH = 5000000 # bytes + logger = logging.getLogger(__name__) # Classes @@ -41,29 +41,30 @@ class Updater: def __init__( self, - repository_name: str, + repository_dir: str, metadata_base_url: str, target_base_url: Optional[str] = None, fetcher: Optional[FetcherInterface] = None, ): """ Args: - repository_name: directory name (within a local directory - defined by 'tuf.settings.repositories_directory') + repository_dir: Local metadata directory. Must contain root.json metadata_base_url: Base URL for all remote metadata downloads target_base_url: Optional; Default base URL for all remote target downloads. Can be individually set in download_target() fetcher: Optional; FetcherInterface implementation used to download both metadata and targets. Default is RequestsFetcher """ - self._repository_name = repository_name + self._dir = repository_dir self._metadata_base_url = _ensure_trailing_slash(metadata_base_url) if target_base_url is None: self._target_base_url = None else: self._target_base_url = _ensure_trailing_slash(target_base_url) - self._consistent_snapshot = False - self._metadata = {} + + # Read trusted local root metadata + data = self._load_local_metadata("root") + self._bundle = metadata_bundle.MetadataBundle(data) if fetcher is None: self._fetcher = requests_fetcher.RequestsFetcher() @@ -195,318 +196,111 @@ def download_target( ) sslib_util.persist_temp_file(target_file, filepath) - def _get_full_meta_name( - self, role: str, extension: str = ".json", version: int = None - ) -> str: - """ - Helper method returning full metadata file path given the role name - and file extension. - """ + def _download_metadata( + self, rolename: str, length: int, version: Optional[int] = None + ) -> bytes: + """download a metadata file and return it as bytes""" if version is None: - filename = role + extension + filename = f"{rolename}.json" else: - filename = str(version) + "." + role + extension - return os.path.join( - settings.repositories_directory, - self._repository_name, - "metadata", - "current", - filename, + filename = f"{version}.{rolename}.json" + url = parse.urljoin(self._metadata_base_url, filename) + return download.download_bytes( + url, + length, + self._fetcher, + strict_required_length=False, ) + def _load_local_metadata(self, rolename: str) -> bytes: + with open(os.path.join(self._dir, f"{rolename}.json"), "rb") as f: + return f.read() + + def _persist_metadata(self, rolename: str, data: bytes): + with open(os.path.join(self._dir, f"{rolename}.json"), "wb") as f: + f.write(data) + def _load_root(self) -> None: - """ - If metadata file for 'root' role does not exist locally, download it - over a network, verify it and store it permanently. - """ + """Load remote root metadata. - # Load trusted root metadata - # TODO: this should happen much earlier, on Updater.__init__ - self._metadata["root"] = RootWrapper.from_json_file( - self._get_full_meta_name("root") - ) + Sequentially load and persist on local disk every newer root metadata + version available on the remote. + """ # Update the root role - # 1.1. Let N denote the version number of the trusted - # root metadata file. - lower_bound = self._metadata["root"].version - upper_bound = lower_bound + settings.MAX_NUMBER_ROOT_ROTATIONS - intermediate_root = None + lower_bound = self._bundle.root.signed.version + 1 + upper_bound = lower_bound + MAX_ROOT_ROTATIONS for next_version in range(lower_bound, upper_bound): try: - root_url = parse.urljoin( - self._metadata_base_url, f"{next_version}.root.json" - ) - # For each version of root iterate over the list of mirrors - # until an intermediate root is successfully downloaded and - # verified. - data = download.download_bytes( - root_url, - settings.DEFAULT_ROOT_REQUIRED_LENGTH, - self._fetcher, - strict_required_length=False, + data = self._download_metadata( + "root", DEFAULT_ROOT_MAX_LENGTH, next_version ) - - intermediate_root = self._verify_root(data) - # TODO: persist should happen here for each intermediate - # root according to the spec + self._bundle.update_root(data) + self._persist_metadata("root", data) except exceptions.FetcherHTTPError as exception: if exception.status_code not in {403, 404}: raise - # Stop looking for a bigger version if "File not found" - # error is received + # 404/403 means current root is newest available break - if intermediate_root: - # Check for a freeze attack. The latest known time MUST be lower - # than the expiration timestamp in the trusted root metadata file - # TODO define which exceptions are part of the public API - intermediate_root.expires() - - # 1.9. If the timestamp and / or snapshot keys have been rotated, - # then delete the trusted timestamp and snapshot metadata files. - if self._metadata["root"].keys( - "timestamp" - ) != intermediate_root.keys("timestamp"): - # FIXME: use abstract storage - os.remove(self._get_full_meta_name("timestamp")) - self._metadata["timestamp"] = {} - - if self._metadata["root"].keys( - "snapshot" - ) != intermediate_root.keys("snapshot"): - # FIXME: use abstract storage - os.remove(self._get_full_meta_name("snapshot")) - self._metadata["snapshot"] = {} - - # Set the trusted root metadata file to the new root - # metadata file - self._metadata["root"] = intermediate_root - # Persist root metadata. The client MUST write the file to - # non-volatile storage as FILENAME.EXT (e.g. root.json). - self._metadata["root"].persist(self._get_full_meta_name("root")) - - # 1.10. Set whether consistent snapshots are used as per - # the trusted root metadata file - self._consistent_snapshot = self._metadata[ - "root" - ].signed.consistent_snapshot + # Verify final root + self._bundle.root_update_finished() def _load_timestamp(self) -> None: - """ - TODO - """ - # TODO Check if timestamp exists locally - timestamp_url = parse.urljoin(self._metadata_base_url, "timestamp.json") - data = download.download_bytes( - timestamp_url, - settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, - self._fetcher, - strict_required_length=False, - ) - self._metadata["timestamp"] = self._verify_timestamp(data) - self._metadata["timestamp"].persist( - self._get_full_meta_name("timestamp.json") + """Load local and remote timestamp metadata""" + try: + data = self._load_local_metadata("timestamp") + self._bundle.update_timestamp(data) + except (OSError, exceptions.RepositoryError) as e: + # Local load can fail: it's not fatal + logger.debug("Failed to load local timestamp %s", e) + + # Load from remote (whether local load succeeded or not) + data = self._download_metadata( + "timestamp", DEFAULT_TIMESTAMP_MAX_LENGTH ) + self._bundle.update_timestamp(data) + self._persist_metadata("timestamp", data) def _load_snapshot(self) -> None: - """ - TODO - """ + """Load local (and if needed remote) snapshot metadata""" try: - length = self._metadata["timestamp"].snapshot["length"] - except KeyError: - length = settings.DEFAULT_SNAPSHOT_REQUIRED_LENGTH - - # Uncomment when implementing consistent_snapshot - # if self._consistent_snapshot: - # version = self._metadata["timestamp"].snapshot["version"] - # else: - # version = None - - # TODO: Check if exists locally - snapshot_url = parse.urljoin(self._metadata_base_url, "snapshot.json") - data = download.download_bytes( - snapshot_url, - length, - self._fetcher, - strict_required_length=False, - ) - - self._metadata["snapshot"] = self._verify_snapshot(data) - self._metadata["snapshot"].persist( - self._get_full_meta_name("snapshot.json") - ) - - def _load_targets(self, targets_role: str, parent_role: str) -> None: - """ - TODO - """ + data = self._load_local_metadata("snapshot") + self._bundle.update_snapshot(data) + except (OSError, exceptions.RepositoryError) as e: + # Local load failed: we must update from remote + logger.debug("Failed to load local snapshot %s", e) + + metainfo = self._bundle.timestamp.signed.meta["snapshot.json"] + length = metainfo.get("length") or DEFAULT_SNAPSHOT_MAX_LENGTH + version = None + if self._bundle.root.signed.consistent_snapshot: + version = metainfo["version"] + + data = self._download_metadata("snapshot", length, version) + self._bundle.update_snapshot(data) + self._persist_metadata("snapshot", data) + + def _load_targets(self, role: str, parent_role: str) -> None: + """Load local (and if needed remote) metadata for 'role'.""" try: - length = self._metadata["snapshot"].role(targets_role)["length"] - except KeyError: - length = settings.DEFAULT_TARGETS_REQUIRED_LENGTH - - # Uncomment when implementing consistent_snapshot - # if self._consistent_snapshot: - # version = self._metadata["snapshot"].role(targets_role)["version"] - # else: - # version = None - - # TODO: Check if exists locally - - targets_url = parse.urljoin( - self._metadata_base_url, f"{targets_role}.json" - ) - data = download.download_bytes( - targets_url, - length, - self._fetcher, - strict_required_length=False, - ) - - self._metadata[targets_role] = self._verify_targets( - data, targets_role, parent_role - ) - self._metadata[targets_role].persist( - self._get_full_meta_name(targets_role, extension=".json") - ) - - def _verify_root(self, file_content: bytes) -> RootWrapper: - """ - TODO - """ - - intermediate_root = RootWrapper.from_json_object(file_content) - - # Check for an arbitrary software attack - trusted_root = self._metadata["root"] - intermediate_root.verify( - trusted_root.keys("root"), trusted_root.threshold("root") - ) - intermediate_root.verify( - intermediate_root.keys("root"), intermediate_root.threshold("root") - ) - - # Check for a rollback attack. - if intermediate_root.version < trusted_root.version: - raise exceptions.ReplayedMetadataError( - "root", intermediate_root.version(), trusted_root.version() - ) - # Note that the expiration of the new (intermediate) root metadata - # file does not matter yet, because we will check for it in step 1.8. - - return intermediate_root - - def _verify_timestamp(self, file_content: bytes) -> TimestampWrapper: - """ - TODO - """ - intermediate_timestamp = TimestampWrapper.from_json_object(file_content) - - # Check for an arbitrary software attack - trusted_root = self._metadata["root"] - intermediate_timestamp.verify( - trusted_root.keys("timestamp"), trusted_root.threshold("timestamp") - ) - - # Check for a rollback attack. - if self._metadata.get("timestamp"): - if ( - intermediate_timestamp.signed.version - <= self._metadata["timestamp"].version - ): - raise exceptions.ReplayedMetadataError( - "root", - intermediate_timestamp.version(), - self._metadata["timestamp"].version(), - ) - - if self._metadata.get("snapshot"): - if ( - intermediate_timestamp.snapshot.version - <= self._metadata["timestamp"].snapshot["version"] - ): - raise exceptions.ReplayedMetadataError( - "root", - intermediate_timestamp.snapshot.version(), - self._metadata["snapshot"].version(), - ) - - intermediate_timestamp.expires() - - return intermediate_timestamp - - def _verify_snapshot(self, file_content: bytes) -> SnapshotWrapper: - """ - TODO - """ - - # Check against timestamp metadata - if self._metadata["timestamp"].snapshot.get("hash"): - _check_hashes( - file_content, self._metadata["timestamp"].snapshot.get("hash") - ) - - intermediate_snapshot = SnapshotWrapper.from_json_object(file_content) - - if ( - intermediate_snapshot.version - != self._metadata["timestamp"].snapshot["version"] - ): - raise exceptions.BadVersionNumberError - - # Check for an arbitrary software attack - trusted_root = self._metadata["root"] - intermediate_snapshot.verify( - trusted_root.keys("snapshot"), trusted_root.threshold("snapshot") - ) - - # Check for a rollback attack - if self._metadata.get("snapshot"): - for target_role in intermediate_snapshot.signed.meta: - if ( - target_role["version"] - != self._metadata["snapshot"].meta[target_role]["version"] - ): - raise exceptions.BadVersionNumberError - - intermediate_snapshot.expires() - - return intermediate_snapshot - - def _verify_targets( - self, file_content: bytes, filename: str, parent_role: str - ) -> TargetsWrapper: - """ - TODO - """ - - # Check against timestamp metadata - if self._metadata["snapshot"].role(filename).get("hash"): - _check_hashes( - file_content, self._metadata["snapshot"].targets.get("hash") - ) - - intermediate_targets = TargetsWrapper.from_json_object(file_content) - if ( - intermediate_targets.version - != self._metadata["snapshot"].role(filename)["version"] - ): - raise exceptions.BadVersionNumberError - - # Check for an arbitrary software attack - parent_role = self._metadata[parent_role] - - intermediate_targets.verify( - parent_role.keys(filename), parent_role.threshold(filename) - ) - - intermediate_targets.expires() - - return intermediate_targets + data = self._load_local_metadata(role) + self._bundle.update_delegated_targets(data, role, parent_role) + except (OSError, exceptions.RepositoryError) as e: + # Local load failed: we must update from remote + logger.debug("Failed to load local %s: %s", role, e) + + metainfo = self._bundle.snapshot.signed.meta[f"{role}.json"] + length = metainfo.get("length") or DEFAULT_TARGETS_MAX_LENGTH + version = None + if self._bundle.root.signed.consistent_snapshot: + version = metainfo["version"] + + data = self._download_metadata(role, length, version) + self._bundle.update_delegated_targets(data, role, parent_role) + self._persist_metadata(role, data) def _preorder_depth_first_walk(self, target_filepath) -> Dict: """ @@ -516,7 +310,7 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: target = None role_names = [("targets", "root")] visited_role_names = set() - number_of_delegations = settings.MAX_NUMBER_OF_DELEGATIONS + number_of_delegations = MAX_DELEGATIONS # Ensure the client has the most up-to-date version of 'targets.json'. # Raise 'exceptions.NoWorkingMirrorError' if the changed metadata @@ -542,14 +336,13 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: # The metadata for 'role_name' must be downloaded/updated before # its targets, delegations, and child roles can be inspected. - # self._metadata['current'][role_name] is currently missing. # _refresh_targets_metadata() does not refresh 'targets.json', it # expects _update_metadata_if_changed() to have already refreshed # it, which this function has checked above. # self._refresh_targets_metadata(role_name, # refresh_all_delegated_roles=False) - role_metadata = self._metadata[role_name] + role_metadata = self._bundle[role_name].signed target = role_metadata.targets.get(target_filepath) # After preorder check, add current role to set of visited roles. @@ -610,10 +403,8 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: and len(role_names) > 0 ): msg = ( - f"{len(role_names)} roles left to visit, ", - "but allowed to visit at most ", - f"{settings.MAX_NUMBER_OF_DELEGATIONS}", - " delegations.", + f"{len(role_names)} roles left to visit, but allowed to ", + f"visit at most {MAX_DELEGATIONS} delegations.", ) logger.debug(msg) @@ -749,27 +540,6 @@ def _check_hashes_obj(file_object, trusted_hashes): ) -def _check_hashes(file_content, trusted_hashes): - """ - TODO - """ - # Verify each trusted hash of 'trusted_hashes'. If all are valid, simply - # return. - for algorithm, trusted_hash in trusted_hashes.items(): - digest_object = sslib_hash.digest(algorithm) - - digest_object.update(file_content) - computed_hash = digest_object.hexdigest() - - # Raise an exception if any of the hashes are incorrect. - if trusted_hash != computed_hash: - raise exceptions.BadHashError(trusted_hash, computed_hash) - - logger.info( - "The file's " + algorithm + " hash is" " correct: " + trusted_hash - ) - - def _get_filepath_hash(target_filepath, hash_function="sha256"): """ TODO From 2bbf5bc178cd947468de8c65b4fcc184fdce5f45 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Mon, 17 May 2021 12:44:42 +0300 Subject: [PATCH 76/86] tests: Cleanup in new Updater tests Add test for a refresh with just a local root.json. Remove unused code. Add docstrings for raised exceptions, add TODOs for the missing exception handling. Signed-off-by: Jussi Kukkonen --- tests/test_updater_rework.py | 83 ++++------------------------- tuf/client_rework/updater_rework.py | 23 +++++++- 2 files changed, 31 insertions(+), 75 deletions(-) diff --git a/tests/test_updater_rework.py b/tests/test_updater_rework.py index 4522f1a5c9..5f1059423c 100644 --- a/tests/test_updater_rework.py +++ b/tests/test_updater_rework.py @@ -122,13 +122,6 @@ def setUp(self): metadata_url, targets_url) - # Metadata role keys are needed by the test cases to make changes to the - # repository (e.g., adding a new target file to 'targets.json' and then - # requesting a refresh()). - self.role_keys = _load_role_keys(self.keystore_directory) - - - def tearDown(self): # We are inheriting from custom class. unittest_toolbox.Modified_TestCase.tearDown(self) @@ -136,24 +129,10 @@ def tearDown(self): # Logs stdout and stderr from the sever subprocess. self.server_process_handler.flush_log() - - - # UNIT TESTS. def test_refresh(self): self.repository_updater.refresh() - for role in ['root', 'timestamp', 'snapshot', 'targets']: - metadata_obj = metadata.Metadata.from_file(os.path.join( - self.client_directory, role + '.json')) - - metadata_obj_2 = metadata.Metadata.from_file(os.path.join( - self.repository_directory, 'metadata', role + '.json')) - - - self.assertDictEqual(metadata_obj.to_dict(), - metadata_obj_2.to_dict()) - # Get targetinfo for 'file1.txt' listed in targets targetinfo1 = self.repository_updater.get_one_valid_targetinfo('file1.txt') # Get targetinfo for 'file3.txt' listed in the delegated role1 @@ -178,60 +157,16 @@ def test_refresh(self): self.assertListEqual(updated_targets, []) + def test_refresh_with_only_local_root(self): + os.remove(os.path.join(self.client_directory, "timestamp.json")) + os.remove(os.path.join(self.client_directory, "snapshot.json")) + os.remove(os.path.join(self.client_directory, "targets.json")) + os.remove(os.path.join(self.client_directory, "role1.json")) + + self.repository_updater.refresh() -def _load_role_keys(keystore_directory): - - # Populating 'self.role_keys' by importing the required public and private - # keys of 'tuf/tests/repository_data/'. The role keys are needed when - # modifying the remote repository used by the test cases in this unit test. - - # The pre-generated key files in 'repository_data/keystore' are all encrypted with - # a 'password' passphrase. - EXPECTED_KEYFILE_PASSWORD = 'password' - - # Store and return the cryptography keys of the top-level roles, including 1 - # delegated role. - role_keys = {} - - root_key_file = os.path.join(keystore_directory, 'root_key') - targets_key_file = os.path.join(keystore_directory, 'targets_key') - snapshot_key_file = os.path.join(keystore_directory, 'snapshot_key') - timestamp_key_file = os.path.join(keystore_directory, 'timestamp_key') - delegation_key_file = os.path.join(keystore_directory, 'delegation_key') - - role_keys = {'root': {}, 'targets': {}, 'snapshot': {}, 'timestamp': {}, - 'role1': {}} - - # Import the top-level and delegated role public keys. - role_keys['root']['public'] = \ - repo_tool.import_rsa_publickey_from_file(root_key_file+'.pub') - role_keys['targets']['public'] = \ - repo_tool.import_ed25519_publickey_from_file(targets_key_file+'.pub') - role_keys['snapshot']['public'] = \ - repo_tool.import_ed25519_publickey_from_file(snapshot_key_file+'.pub') - role_keys['timestamp']['public'] = \ - repo_tool.import_ed25519_publickey_from_file(timestamp_key_file+'.pub') - role_keys['role1']['public'] = \ - repo_tool.import_ed25519_publickey_from_file(delegation_key_file+'.pub') - - # Import the private keys of the top-level and delegated roles. - role_keys['root']['private'] = \ - repo_tool.import_rsa_privatekey_from_file(root_key_file, - EXPECTED_KEYFILE_PASSWORD) - role_keys['targets']['private'] = \ - repo_tool.import_ed25519_privatekey_from_file(targets_key_file, - EXPECTED_KEYFILE_PASSWORD) - role_keys['snapshot']['private'] = \ - repo_tool.import_ed25519_privatekey_from_file(snapshot_key_file, - EXPECTED_KEYFILE_PASSWORD) - role_keys['timestamp']['private'] = \ - repo_tool.import_ed25519_privatekey_from_file(timestamp_key_file, - EXPECTED_KEYFILE_PASSWORD) - role_keys['role1']['private'] = \ - repo_tool.import_ed25519_privatekey_from_file(delegation_key_file, - EXPECTED_KEYFILE_PASSWORD) - - return role_keys + # Get targetinfo for 'file3.txt' listed in the delegated role1 + targetinfo3= self.repository_updater.get_one_valid_targetinfo('file3.txt') if __name__ == '__main__': utils.configure_test_logging(sys.argv) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 4b326dc5e7..d31af0b6ee 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -48,12 +48,17 @@ def __init__( ): """ Args: - repository_dir: Local metadata directory. Must contain root.json + repository_dir: Local metadata directory. Directory must be + writable and it must contain at least a root.json file. metadata_base_url: Base URL for all remote metadata downloads target_base_url: Optional; Default base URL for all remote target downloads. Can be individually set in download_target() fetcher: Optional; FetcherInterface implementation used to download both metadata and targets. Default is RequestsFetcher + + Raises: + OSError: Local root.json cannot be read + RepositoryError: Local root.json is invalid """ self._dir = repository_dir self._metadata_base_url = _ensure_trailing_slash(metadata_base_url) @@ -83,6 +88,11 @@ def refresh(self) -> None: The refresh() method should be called by the client before any target requests. + + Raises: + OSError: New metadata could not be written to disk + RepositoryError: Metadata failed to verify in some way + TODO: download-related errors """ self._load_root() @@ -102,6 +112,11 @@ def get_one_valid_targetinfo(self, target_path: str) -> Dict: (https://url.spec.whatwg.org/#path-relative-url-string). Typically this is also the unix file path of the eventually downloaded file. + + Raises: + OSError: New metadata could not be written to disk + RepositoryError: Metadata failed to verify in some way + TODO: download-related errors """ return self._preorder_depth_first_walk(target_path) @@ -172,6 +187,10 @@ def download_target( destination_directory as required. target_base_url: Optional; Base URL used to form the final target download URL. Default is the value provided in Updater() + + Raises: + TODO: download-related errors + TODO: file write errors """ if target_base_url is None and self._target_base_url is None: raise ValueError( @@ -269,6 +288,7 @@ def _load_snapshot(self) -> None: try: data = self._load_local_metadata("snapshot") self._bundle.update_snapshot(data) + logger.debug("Local snapshot is valid: not downloading new one") except (OSError, exceptions.RepositoryError) as e: # Local load failed: we must update from remote logger.debug("Failed to load local snapshot %s", e) @@ -288,6 +308,7 @@ def _load_targets(self, role: str, parent_role: str) -> None: try: data = self._load_local_metadata(role) self._bundle.update_delegated_targets(data, role, parent_role) + logger.debug("Local %s is valid: not downloading new one", role) except (OSError, exceptions.RepositoryError) as e: # Local load failed: we must update from remote logger.debug("Failed to load local %s: %s", role, e) From d35fc27b0a4f424db76672086809ef37bd0b2e64 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Wed, 19 May 2021 19:14:03 +0300 Subject: [PATCH 77/86] New updater: Update comments Mostly remove comments that provide little value after all the changes. Also remove a unused variable from a test. Signed-off-by: Jussi Kukkonen --- tests/test_updater_rework.py | 4 +--- tuf/client_rework/updater_rework.py | 19 +++---------------- 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/tests/test_updater_rework.py b/tests/test_updater_rework.py index 5f1059423c..3ea9d08622 100644 --- a/tests/test_updater_rework.py +++ b/tests/test_updater_rework.py @@ -79,8 +79,6 @@ def setUp(self): # We are inheriting from custom class. unittest_toolbox.Modified_TestCase.setUp(self) - self.repository_name = 'test_repository1' - # Copy the original repository files provided in the test folder so that # any modifications made to repository files are restricted to the copies. # The 'repository_data' directory is expected to exist in 'tuf.tests/'. @@ -130,7 +128,7 @@ def tearDown(self): self.server_process_handler.flush_log() def test_refresh(self): - + # All metadata is in local directory already self.repository_updater.refresh() # Get targetinfo for 'file1.txt' listed in targets diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index d31af0b6ee..470c8a14f9 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -273,7 +273,7 @@ def _load_timestamp(self) -> None: data = self._load_local_metadata("timestamp") self._bundle.update_timestamp(data) except (OSError, exceptions.RepositoryError) as e: - # Local load can fail: it's not fatal + # Local timestamp does not exist or is invalid logger.debug("Failed to load local timestamp %s", e) # Load from remote (whether local load succeeded or not) @@ -290,7 +290,7 @@ def _load_snapshot(self) -> None: self._bundle.update_snapshot(data) logger.debug("Local snapshot is valid: not downloading new one") except (OSError, exceptions.RepositoryError) as e: - # Local load failed: we must update from remote + # Local snapshot does not exist or is invalid: update from remote logger.debug("Failed to load local snapshot %s", e) metainfo = self._bundle.timestamp.signed.meta["snapshot.json"] @@ -310,7 +310,7 @@ def _load_targets(self, role: str, parent_role: str) -> None: self._bundle.update_delegated_targets(data, role, parent_role) logger.debug("Local %s is valid: not downloading new one", role) except (OSError, exceptions.RepositoryError) as e: - # Local load failed: we must update from remote + # Local 'role' does not exist or is invalid: update from remote logger.debug("Failed to load local %s: %s", role, e) metainfo = self._bundle.snapshot.signed.meta[f"{role}.json"] @@ -333,14 +333,6 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: visited_role_names = set() number_of_delegations = MAX_DELEGATIONS - # Ensure the client has the most up-to-date version of 'targets.json'. - # Raise 'exceptions.NoWorkingMirrorError' if the changed metadata - # cannot be successfully downloaded and - # 'exceptions.RepositoryError' if the referenced metadata is - # missing. Target methods such as this one are called after the - # top-level metadata have been refreshed (i.e., updater.refresh()). - # self._update_metadata_if_changed('targets') - # Preorder depth-first traversal of the graph of target delegations. while ( target is None and number_of_delegations > 0 and len(role_names) > 0 @@ -357,11 +349,6 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: # The metadata for 'role_name' must be downloaded/updated before # its targets, delegations, and child roles can be inspected. - # _refresh_targets_metadata() does not refresh 'targets.json', it - # expects _update_metadata_if_changed() to have already refreshed - # it, which this function has checked above. - # self._refresh_targets_metadata(role_name, - # refresh_all_delegated_roles=False) role_metadata = self._bundle[role_name].signed target = role_metadata.targets.get(target_filepath) From 38ce08314949744c3866c10c5f4085004992f5ae Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 20 May 2021 18:26:49 +0300 Subject: [PATCH 78/86] Add fixes after merging develop Fixes incorrct access to MetaFile and TargetFile after the merge of develop into experimetnal-client branch. Signed-off-by: Teodora Sechkova --- tuf/client_rework/metadata_bundle.py | 24 ++++++++++++------------ tuf/client_rework/updater_rework.py | 16 ++++++++-------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/client_rework/metadata_bundle.py index 87d4d0e62c..ac5babe60b 100644 --- a/tuf/client_rework/metadata_bundle.py +++ b/tuf/client_rework/metadata_bundle.py @@ -287,14 +287,14 @@ def update_timestamp(self, data: bytes): ) # Prevent rolling back snapshot version if ( - new_timestamp.signed.meta["snapshot.json"]["version"] - < self.timestamp.signed.meta["snapshot.json"]["version"] + new_timestamp.signed.meta["snapshot.json"].version + < self.timestamp.signed.meta["snapshot.json"].version ): # TODO not sure about the correct exception here raise exceptions.ReplayedMetadataError( "snapshot", - new_timestamp.signed.meta["snapshot.json"]["version"], - self.timestamp.signed.meta["snapshot.json"]["version"], + new_timestamp.signed.meta["snapshot.json"].version, + self.timestamp.signed.meta["snapshot.json"].version, ) if new_timestamp.signed.is_expired(self.reference_time): @@ -324,7 +324,7 @@ def update_snapshot(self, data: bytes): # pylint: disable=too-many-branches meta = self.timestamp.signed.meta["snapshot.json"] # Verify against the hashes in timestamp, if any - hashes = meta.get("hashes") or {} + hashes = meta.hashes or {} for algo, stored_hash in hashes.items(): digest_object = sslib_hash.digest(algo) digest_object.update(data) @@ -350,11 +350,11 @@ def update_snapshot(self, data: bytes): # pylint: disable=too-many-branches if ( new_snapshot.signed.version - != self.timestamp.signed.meta["snapshot.json"]["version"] + != self.timestamp.signed.meta["snapshot.json"].version ): raise exceptions.BadVersionNumberError( f"Expected snapshot version" - f"{self.timestamp.signed.meta['snapshot.json']['version']}," + f"{self.timestamp.signed.meta['snapshot.json'].version}," f"got {new_snapshot.signed.version}" ) @@ -369,10 +369,10 @@ def update_snapshot(self, data: bytes): # pylint: disable=too-many-branches ) # Prevent rollback of any metadata versions - if new_fileinfo["version"] < fileinfo["version"]: + if new_fileinfo.version < fileinfo.version: raise exceptions.BadVersionNumberError( f"Expected {filename} version" - f"{new_fileinfo['version']}, got {fileinfo['version']}" + f"{new_fileinfo.version}, got {fileinfo.version}" ) if new_snapshot.signed.is_expired(self.reference_time): @@ -423,7 +423,7 @@ def update_delegated_targets( f"Snapshot does not contain information for '{role_name}'" ) - hashes = meta.get("hashes") or {} + hashes = meta.hashes or {} for algo, stored_hash in hashes.items(): digest_object = sslib_hash.digest(algo) digest_object.update(data) @@ -448,10 +448,10 @@ def update_delegated_targets( new_delegate, ) - if new_delegate.signed.version != meta["version"]: + if new_delegate.signed.version != meta.version: raise exceptions.BadVersionNumberError( f"Expected {role_name} version" - f"{meta['version']}, got {new_delegate.signed.version}" + f"{meta.version}, got {new_delegate.signed.version}" ) if new_delegate.signed.is_expired(self.reference_time): diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 470c8a14f9..5cb8afe449 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -149,7 +149,7 @@ def updated_targets(targets: Dict, destination_directory: str) -> Dict: # Try one of the algorithm/digest combos for a mismatch. We break # as soon as we find a mismatch. - for algorithm, digest in target["fileinfo"]["hashes"].items(): + for algorithm, digest in target["fileinfo"].hashes.items(): digest_object = None try: digest_object = sslib_hash.digest_filename( @@ -205,10 +205,10 @@ def download_target( full_url = parse.urljoin(target_base_url, targetinfo["filepath"]) with download.download_file( - full_url, targetinfo["fileinfo"]["length"], self._fetcher + full_url, targetinfo["fileinfo"].length, self._fetcher ) as target_file: - _check_file_length(target_file, targetinfo["fileinfo"]["length"]) - _check_hashes_obj(target_file, targetinfo["fileinfo"]["hashes"]) + _check_file_length(target_file, targetinfo["fileinfo"].length) + _check_hashes_obj(target_file, targetinfo["fileinfo"].hashes) filepath = os.path.join( destination_directory, targetinfo["filepath"] @@ -294,10 +294,10 @@ def _load_snapshot(self) -> None: logger.debug("Failed to load local snapshot %s", e) metainfo = self._bundle.timestamp.signed.meta["snapshot.json"] - length = metainfo.get("length") or DEFAULT_SNAPSHOT_MAX_LENGTH + length = metainfo.length or DEFAULT_SNAPSHOT_MAX_LENGTH version = None if self._bundle.root.signed.consistent_snapshot: - version = metainfo["version"] + version = metainfo.version data = self._download_metadata("snapshot", length, version) self._bundle.update_snapshot(data) @@ -314,10 +314,10 @@ def _load_targets(self, role: str, parent_role: str) -> None: logger.debug("Failed to load local %s: %s", role, e) metainfo = self._bundle.snapshot.signed.meta[f"{role}.json"] - length = metainfo.get("length") or DEFAULT_TARGETS_MAX_LENGTH + length = metainfo.length or DEFAULT_TARGETS_MAX_LENGTH version = None if self._bundle.root.signed.consistent_snapshot: - version = metainfo["version"] + version = metainfo.version data = self._download_metadata(role, length, version) self._bundle.update_delegated_targets(data, role, parent_role) From c64a6f92c17034425f86cccc72efac7b230869b2 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 20 May 2021 12:43:34 +0300 Subject: [PATCH 79/86] Rename client_rework to ngclient The current client and the next-gen client should coexist in the same repository during the ongoing development of the latter. Looking for a name which is client-related, short, meeting PEP8 package names requirements. Currently "ngclient" seems to fit in until a better proposal comes. Rename updater_rework.py to updater.py Signed-off-by: Teodora Sechkova --- tests/test_metadata_bundle.py | 2 +- tests/{test_updater_rework.py => test_updater_ng.py} | 2 +- tox.ini | 8 ++++---- tuf/{client_rework => ngclient}/README.md | 0 tuf/{client_rework => ngclient}/__init__.py | 0 tuf/{client_rework => ngclient}/download.py | 0 tuf/{client_rework => ngclient}/fetcher.py | 0 tuf/{client_rework => ngclient}/metadata_bundle.py | 0 tuf/{client_rework => ngclient}/requests_fetcher.py | 2 +- .../updater_rework.py => ngclient/updater.py} | 4 ++-- 10 files changed, 9 insertions(+), 9 deletions(-) rename tests/{test_updater_rework.py => test_updater_ng.py} (99%) rename tuf/{client_rework => ngclient}/README.md (100%) rename tuf/{client_rework => ngclient}/__init__.py (100%) rename tuf/{client_rework => ngclient}/download.py (100%) rename tuf/{client_rework => ngclient}/fetcher.py (100%) rename tuf/{client_rework => ngclient}/metadata_bundle.py (100%) rename tuf/{client_rework => ngclient}/requests_fetcher.py (99%) rename tuf/{client_rework/updater_rework.py => ngclient/updater.py} (99%) diff --git a/tests/test_metadata_bundle.py b/tests/test_metadata_bundle.py index e758e6e7bd..8cd60859c1 100644 --- a/tests/test_metadata_bundle.py +++ b/tests/test_metadata_bundle.py @@ -8,7 +8,7 @@ from tuf import exceptions from tuf.api.metadata import Metadata -from tuf.client_rework.metadata_bundle import MetadataBundle +from tuf.ngclient.metadata_bundle import MetadataBundle from tests import utils diff --git a/tests/test_updater_rework.py b/tests/test_updater_ng.py similarity index 99% rename from tests/test_updater_rework.py rename to tests/test_updater_ng.py index 3ea9d08622..4ef42349b6 100644 --- a/tests/test_updater_rework.py +++ b/tests/test_updater_ng.py @@ -28,7 +28,7 @@ import tuf.log import tuf.repository_tool as repo_tool import tuf.unittest_toolbox as unittest_toolbox -import tuf.client_rework.updater_rework as updater +import tuf.ngclient.updater as updater from tests import utils from tuf.api import metadata diff --git a/tox.ini b/tox.ini index 063364c46d..f987365c85 100644 --- a/tox.ini +++ b/tox.ini @@ -43,14 +43,14 @@ commands = # Use different configs for new (tuf/api/*) and legacy code # TODO: configure black and isort args in pyproject.toml (see #1161) black --check --diff --line-length 80 {toxinidir}/tuf/api - black --check --diff --line-length 80 {toxinidir}/tuf/client_rework + black --check --diff --line-length 80 {toxinidir}/tuf/ngclient isort --check --diff --line-length 80 --profile black -p tuf {toxinidir}/tuf/api - isort --check --diff --line-length 80 --profile black -p tuf {toxinidir}/tuf/client_rework + isort --check --diff --line-length 80 --profile black -p tuf {toxinidir}/tuf/ngclient pylint {toxinidir}/tuf/api --rcfile={toxinidir}/tuf/api/pylintrc - pylint {toxinidir}/tuf/client_rework --rcfile={toxinidir}/tuf/api/pylintrc + pylint {toxinidir}/tuf/ngclient --rcfile={toxinidir}/tuf/api/pylintrc # NOTE: Contrary to what the pylint docs suggest, ignoring full paths does # work, unfortunately each subdirectory has to be ignored explicitly. - pylint {toxinidir}/tuf --ignore={toxinidir}/tuf/api,{toxinidir}/tuf/api/serialization,{toxinidir}/tuf/client_rework + pylint {toxinidir}/tuf --ignore={toxinidir}/tuf/api,{toxinidir}/tuf/api/serialization,{toxinidir}/tuf/ngclient bandit -r {toxinidir}/tuf diff --git a/tuf/client_rework/README.md b/tuf/ngclient/README.md similarity index 100% rename from tuf/client_rework/README.md rename to tuf/ngclient/README.md diff --git a/tuf/client_rework/__init__.py b/tuf/ngclient/__init__.py similarity index 100% rename from tuf/client_rework/__init__.py rename to tuf/ngclient/__init__.py diff --git a/tuf/client_rework/download.py b/tuf/ngclient/download.py similarity index 100% rename from tuf/client_rework/download.py rename to tuf/ngclient/download.py diff --git a/tuf/client_rework/fetcher.py b/tuf/ngclient/fetcher.py similarity index 100% rename from tuf/client_rework/fetcher.py rename to tuf/ngclient/fetcher.py diff --git a/tuf/client_rework/metadata_bundle.py b/tuf/ngclient/metadata_bundle.py similarity index 100% rename from tuf/client_rework/metadata_bundle.py rename to tuf/ngclient/metadata_bundle.py diff --git a/tuf/client_rework/requests_fetcher.py b/tuf/ngclient/requests_fetcher.py similarity index 99% rename from tuf/client_rework/requests_fetcher.py rename to tuf/ngclient/requests_fetcher.py index ef18233024..fb01d5b2f2 100644 --- a/tuf/client_rework/requests_fetcher.py +++ b/tuf/ngclient/requests_fetcher.py @@ -15,7 +15,7 @@ import tuf from tuf import exceptions, settings -from tuf.client_rework.fetcher import FetcherInterface +from tuf.ngclient.fetcher import FetcherInterface # Globals logger = logging.getLogger(__name__) diff --git a/tuf/client_rework/updater_rework.py b/tuf/ngclient/updater.py similarity index 99% rename from tuf/client_rework/updater_rework.py rename to tuf/ngclient/updater.py index 5cb8afe449..0d5caa4278 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/ngclient/updater.py @@ -18,8 +18,8 @@ from securesystemslib import util as sslib_util from tuf import exceptions -from tuf.client.fetcher import FetcherInterface -from tuf.client_rework import download, metadata_bundle, requests_fetcher +from tuf.ngclient import download, metadata_bundle, requests_fetcher +from tuf.ngclient.fetcher import FetcherInterface # Globals MAX_ROOT_ROTATIONS = 32 From c29051cc4b34214f042fb05dc7303e6dc1771005 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 20 May 2021 13:12:03 +0300 Subject: [PATCH 80/86] Move non-public modules to nglient/_internal Separate public/private API. Keep modules containing the piblic classes in the main client directory and move the rest to _internal. Signed-off-by: Teodora Sechkova --- tests/test_metadata_bundle.py | 2 +- tox.ini | 2 +- tuf/ngclient/_internal/__init__.py | 0 tuf/ngclient/{ => _internal}/download.py | 0 tuf/ngclient/{ => _internal}/metadata_bundle.py | 0 tuf/ngclient/{ => _internal}/requests_fetcher.py | 0 tuf/ngclient/updater.py | 2 +- 7 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 tuf/ngclient/_internal/__init__.py rename tuf/ngclient/{ => _internal}/download.py (100%) rename tuf/ngclient/{ => _internal}/metadata_bundle.py (100%) rename tuf/ngclient/{ => _internal}/requests_fetcher.py (100%) diff --git a/tests/test_metadata_bundle.py b/tests/test_metadata_bundle.py index 8cd60859c1..a988b8d3f5 100644 --- a/tests/test_metadata_bundle.py +++ b/tests/test_metadata_bundle.py @@ -8,7 +8,7 @@ from tuf import exceptions from tuf.api.metadata import Metadata -from tuf.ngclient.metadata_bundle import MetadataBundle +from tuf.ngclient._internal.metadata_bundle import MetadataBundle from tests import utils diff --git a/tox.ini b/tox.ini index f987365c85..29b0550107 100644 --- a/tox.ini +++ b/tox.ini @@ -51,6 +51,6 @@ commands = # NOTE: Contrary to what the pylint docs suggest, ignoring full paths does # work, unfortunately each subdirectory has to be ignored explicitly. - pylint {toxinidir}/tuf --ignore={toxinidir}/tuf/api,{toxinidir}/tuf/api/serialization,{toxinidir}/tuf/ngclient + pylint {toxinidir}/tuf --ignore={toxinidir}/tuf/api,{toxinidir}/tuf/api/serialization,{toxinidir}/tuf/ngclient,{toxinidir}/tuf/ngclient/_internal bandit -r {toxinidir}/tuf diff --git a/tuf/ngclient/_internal/__init__.py b/tuf/ngclient/_internal/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tuf/ngclient/download.py b/tuf/ngclient/_internal/download.py similarity index 100% rename from tuf/ngclient/download.py rename to tuf/ngclient/_internal/download.py diff --git a/tuf/ngclient/metadata_bundle.py b/tuf/ngclient/_internal/metadata_bundle.py similarity index 100% rename from tuf/ngclient/metadata_bundle.py rename to tuf/ngclient/_internal/metadata_bundle.py diff --git a/tuf/ngclient/requests_fetcher.py b/tuf/ngclient/_internal/requests_fetcher.py similarity index 100% rename from tuf/ngclient/requests_fetcher.py rename to tuf/ngclient/_internal/requests_fetcher.py diff --git a/tuf/ngclient/updater.py b/tuf/ngclient/updater.py index 0d5caa4278..551fa67eeb 100644 --- a/tuf/ngclient/updater.py +++ b/tuf/ngclient/updater.py @@ -18,7 +18,7 @@ from securesystemslib import util as sslib_util from tuf import exceptions -from tuf.ngclient import download, metadata_bundle, requests_fetcher +from tuf.ngclient._internal import download, metadata_bundle, requests_fetcher from tuf.ngclient.fetcher import FetcherInterface # Globals From 3f1aa10dbdb4cdcc63cda944422ed064268e273b Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 20 May 2021 13:48:01 +0300 Subject: [PATCH 81/86] Expose client public classes from __init__.py Only "Updater" and "FetcherInterface" are considered public classes of the client. Exposing them in __init__.py makes usage and access simpler. Signed-off-by: Teodora Sechkova --- tests/test_updater_ng.py | 4 ++-- tuf/ngclient/__init__.py | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/test_updater_ng.py b/tests/test_updater_ng.py index 4ef42349b6..44dae1d006 100644 --- a/tests/test_updater_ng.py +++ b/tests/test_updater_ng.py @@ -28,10 +28,10 @@ import tuf.log import tuf.repository_tool as repo_tool import tuf.unittest_toolbox as unittest_toolbox -import tuf.ngclient.updater as updater from tests import utils from tuf.api import metadata +from tuf import ngclient import securesystemslib @@ -116,7 +116,7 @@ def setUp(self): targets_url = f"{url_prefix}/targets/" # Creating a repository instance. The test cases will use this client # updater to refresh metadata, fetch target files, etc. - self.repository_updater = updater.Updater(self.client_directory, + self.repository_updater = ngclient.Updater(self.client_directory, metadata_url, targets_url) diff --git a/tuf/ngclient/__init__.py b/tuf/ngclient/__init__.py index e69de29bb2..0a572962ba 100644 --- a/tuf/ngclient/__init__.py +++ b/tuf/ngclient/__init__.py @@ -0,0 +1,8 @@ +# Copyright New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""TUF client public API +""" + +from tuf.ngclient.fetcher import FetcherInterface +from tuf.ngclient.updater import Updater From d57d36f4bb283d1ac7ff0be6c627075c3067bcdb Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 20 May 2021 14:20:13 +0300 Subject: [PATCH 82/86] Combine paths in tox.ini Use the same call of black, isort, pylint to cover multiple directories. Signed-off-by: Teodora Sechkova --- tox.ini | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tox.ini b/tox.ini index 29b0550107..d687a4620b 100644 --- a/tox.ini +++ b/tox.ini @@ -42,12 +42,9 @@ commands = commands = # Use different configs for new (tuf/api/*) and legacy code # TODO: configure black and isort args in pyproject.toml (see #1161) - black --check --diff --line-length 80 {toxinidir}/tuf/api - black --check --diff --line-length 80 {toxinidir}/tuf/ngclient - isort --check --diff --line-length 80 --profile black -p tuf {toxinidir}/tuf/api - isort --check --diff --line-length 80 --profile black -p tuf {toxinidir}/tuf/ngclient - pylint {toxinidir}/tuf/api --rcfile={toxinidir}/tuf/api/pylintrc - pylint {toxinidir}/tuf/ngclient --rcfile={toxinidir}/tuf/api/pylintrc + black --check --diff --line-length 80 {toxinidir}/tuf/api {toxinidir}/tuf/ngclient + isort --check --diff --line-length 80 --profile black -p tuf {toxinidir}/tuf/api {toxinidir}/tuf/ngclient + pylint {toxinidir}/tuf/api {toxinidir}/tuf/ngclient --rcfile={toxinidir}/tuf/api/pylintrc # NOTE: Contrary to what the pylint docs suggest, ignoring full paths does # work, unfortunately each subdirectory has to be ignored explicitly. From 18c527241cdd8267bb3507bd73ec734ac0fed1a4 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 20 May 2021 14:27:40 +0300 Subject: [PATCH 83/86] Omit ngclient from coverage check Restore coverage back to 97% but omit ngclient form the overall score until tests are implemented. Signed-off-by: Teodora Sechkova --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index d687a4620b..e1ab88eef1 100644 --- a/tox.ini +++ b/tox.ini @@ -16,7 +16,7 @@ changedir = tests commands = python --version python -m coverage run aggregate_tests.py - python -m coverage report -m --fail-under 90 + python -m coverage report -m --fail-under 97 --omit "{toxinidir}/tuf/ngclient/*" deps = -r{toxinidir}/requirements-test.txt From 534021bdf118513a540cd1c77d593ffe23e1d72a Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 21 May 2021 11:28:46 +0300 Subject: [PATCH 84/86] ngclient: Fix logging to remove pylint disable Remove pylint disable logging-no-lazy, fix remaining non-lazy logging (ngclient/updater.py still contains some but pylint does not notice them: These will be fixed in issue #1400) Signed-off-by: Jussi Kukkonen --- tuf/api/pylintrc | 1 - tuf/ngclient/_internal/download.py | 43 ++++++---------------- tuf/ngclient/_internal/requests_fetcher.py | 16 ++++---- 3 files changed, 18 insertions(+), 42 deletions(-) diff --git a/tuf/api/pylintrc b/tuf/api/pylintrc index b6ce46c993..01139a8804 100644 --- a/tuf/api/pylintrc +++ b/tuf/api/pylintrc @@ -14,7 +14,6 @@ disable=fixme, too-few-public-methods, too-many-arguments, - logging-not-lazy, [BASIC] good-names=i,j,k,v,e,f,fn,fp,_type diff --git a/tuf/ngclient/_internal/download.py b/tuf/ngclient/_internal/download.py index fc53b70e12..31b59f6630 100644 --- a/tuf/ngclient/_internal/download.py +++ b/tuf/ngclient/_internal/download.py @@ -86,7 +86,7 @@ def download_file(url, required_length, fetcher, strict_required_length=True): # encoded as %5c in the url, which should also be replaced with a forward # slash. url = parse.unquote(url).replace("\\", "/") - logger.info("Downloading: " + repr(url)) + logger.info("Downloading: %s", url) # This is the temporary file that we will return to contain the contents of # the downloaded file. @@ -134,7 +134,7 @@ def download_file(url, required_length, fetcher, strict_required_length=True): except Exception: # Close 'temp_file'. Any written data is lost. temp_file.close() - logger.debug("Could not download URL: " + repr(url)) + logger.debug("Could not download URL: %s", url) raise else: @@ -202,44 +202,23 @@ def _check_downloaded_length( """ if total_downloaded == required_length: - logger.info( - "Downloaded " + str(total_downloaded) + " bytes out of the" - " expected " + str(required_length) + " bytes." - ) + logger.info("Downloaded %d bytes as expected.", total_downloaded) else: - difference_in_bytes = abs(total_downloaded - required_length) - # What we downloaded is not equal to the required length, but did we ask # for strict checking of required length? if strict_required_length: logger.info( - "Downloaded " + str(total_downloaded) + " bytes, but" - " expected " - + str(required_length) - + " bytes. There is a difference" - " of " + str(difference_in_bytes) + " bytes." + "Downloaded %d bytes, but expected %d bytes", + total_downloaded, + required_length, ) # If the average download speed is below a certain threshold, we # flag this as a possible slow-retrieval attack. - logger.debug( - "Average download speed: " + repr(average_download_speed) - ) - logger.debug( - "Minimum average download speed: " - + repr(tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED) - ) - if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: raise exceptions.SlowRetrievalError(average_download_speed) - logger.debug( - "Good average download speed: " - + repr(average_download_speed) - + " bytes per second" - ) - raise exceptions.DownloadLengthMismatchError( required_length, total_downloaded ) @@ -253,12 +232,12 @@ def _check_downloaded_length( raise exceptions.SlowRetrievalError(average_download_speed) logger.debug( - "Good average download speed: " - + repr(average_download_speed) - + " bytes per second" + "Good average download speed: %f bytes per second", + average_download_speed, ) logger.info( - "Downloaded " + str(total_downloaded) + " bytes out of an" - " upper limit of " + str(required_length) + " bytes." + "Downloaded %d bytes out of upper limit of %d bytes.", + total_downloaded, + required_length, ) diff --git a/tuf/ngclient/_internal/requests_fetcher.py b/tuf/ngclient/_internal/requests_fetcher.py index fb01d5b2f2..6913b27edd 100644 --- a/tuf/ngclient/_internal/requests_fetcher.py +++ b/tuf/ngclient/_internal/requests_fetcher.py @@ -118,11 +118,9 @@ def chunks(): # downloaded. if not data: logger.debug( - "Downloaded " - + repr(bytes_received) - + "/" - + repr(required_length) - + " bytes." + "Downloaded %d out of %d bytes", + bytes_received, + required_length, ) # Finally, we signal that the download is complete. @@ -156,8 +154,8 @@ def _get_session(self, url): session_index = parsed_url.scheme + "+" + parsed_url.hostname - logger.debug("url: " + url) - logger.debug("session index: " + session_index) + logger.debug("url: %s", url) + logger.debug("session index: %s", session_index) session = self._sessions.get(session_index) @@ -181,9 +179,9 @@ def _get_session(self, url): } ) - logger.debug("Made new session for " + session_index) + logger.debug("Made new session %s", session_index) else: - logger.debug("Reusing session for " + session_index) + logger.debug("Reusing session %s", session_index) return session From 000a1846161b5d2cdbcbcef99a94ce59a1c29c6f Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 21 May 2021 09:33:09 +0300 Subject: [PATCH 85/86] ngclient: Update README Fixes #1404 Signed-off-by: Jussi Kukkonen --- tuf/ngclient/README.md | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/tuf/ngclient/README.md b/tuf/ngclient/README.md index aa05e534c8..ad1de19b78 100644 --- a/tuf/ngclient/README.md +++ b/tuf/ngclient/README.md @@ -1,9 +1,22 @@ -# updater.py -**updater.py** is intended as the only TUF module that software update -systems need to utilize for a low-level integration. It provides a single -class representing an updater that includes methods to download, install, and -verify metadata or target files in a secure manner. Importing -**tuf.client.updater** and instantiating its main class is all that is -required by the client prior to a TUF update request. The importation and -instantiation steps allow TUF to load all of the required metadata files -and set the repository mirror information. +## Next-gen TUF client for Python + +This package provides modules for TUF client implementers. + +**tuf.ngclient.Updater** is a class that implements the client workflow +described in the TUF specification (see +https://theupdateframework.github.io/specification/latest/#detailed-client-workflow) + +**tuf.ngclient.FetcherInterface** is an abstract class that client +implementers can optionally use to integrate with their own +network/download infrastructure -- a Requests-based implementation is +used by default. + +This package: +* Aims to be a clean, easy-to-validate reference client implementation + written in modern Python +* At the same time aims to be the library choice for anyone + implementing a TUF client in Python: light-weight, easy to integrate + and with minimal required dependencies +* Is still under development but planned to become the default client + in this code base (as in the older tuf.client will be deprecated in + the future) From e54ed249f52e79d2e1fcada60b46c7d1cf98aaa7 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Fri, 21 May 2021 13:04:23 +0300 Subject: [PATCH 86/86] Add docstrings in Updater Add some missing docstrings in updater.py Signed-off-by: Teodora Sechkova --- tuf/ngclient/updater.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/tuf/ngclient/updater.py b/tuf/ngclient/updater.py index 551fa67eeb..27ac74617c 100644 --- a/tuf/ngclient/updater.py +++ b/tuf/ngclient/updater.py @@ -1,10 +1,7 @@ # Copyright 2020, New York University and the TUF contributors # SPDX-License-Identifier: MIT OR Apache-2.0 -"""TUF client 1.0.0 draft - -TODO - +"""TUF client workflow implementation. """ import fnmatch @@ -34,9 +31,8 @@ # Classes class Updater: """ - Provides a class that can download target files securely. - - TODO + An implemetation of the TUF client workflow. + Provides a public API for integration in client applications. """ def __init__( @@ -218,7 +214,7 @@ def download_target( def _download_metadata( self, rolename: str, length: int, version: Optional[int] = None ) -> bytes: - """download a metadata file and return it as bytes""" + """Download a metadata file and return it as bytes""" if version is None: filename = f"{rolename}.json" else: @@ -325,7 +321,9 @@ def _load_targets(self, role: str, parent_role: str) -> None: def _preorder_depth_first_walk(self, target_filepath) -> Dict: """ - TODO + Interrogates the tree of target delegations in order of appearance + (which implicitly order trustworthiness), and returns the matching + target found in the most trusted role. """ target = None @@ -518,7 +516,12 @@ def _visit_child_role(child_role: Dict, target_filepath: str) -> str: def _check_file_length(file_object, trusted_file_length): """ - TODO + Given a file_object, checks whether its length matches + trusted_file_length. + + Raises: + DownloadLengthMismatchError: File length does not match + expected length. """ file_object.seek(0, 2) observed_length = file_object.tell() @@ -532,7 +535,11 @@ def _check_file_length(file_object, trusted_file_length): def _check_hashes_obj(file_object, trusted_hashes): """ - TODO + Given a file_object, checks whether its hash matches + trusted_hashes. + + Raises: + BadHashError: Hashes do not match """ for algorithm, trusted_hash in trusted_hashes.items(): digest_object = sslib_hash.digest_fileobject(file_object, algorithm) @@ -550,10 +557,10 @@ def _check_hashes_obj(file_object, trusted_hashes): def _get_filepath_hash(target_filepath, hash_function="sha256"): """ - TODO + Calculate the hash of the filepath to determine which bin to find the + target. """ - # Calculate the hash of the filepath to determine which bin to find the - # target. The client currently assumes the repository (i.e., repository + # The client currently assumes the repository (i.e., repository # tool) uses 'hash_function' to generate hashes and UTF-8. digest_object = sslib_hash.digest(hash_function) encoded_target_filepath = target_filepath.encode("utf-8")