Source code for llvm_ir_dataset_utils.tools.build_crate_from_repository
"""Tool to build a crate given just a repository."""
import json
import logging
from absl import app
from absl import flags
import ray
from llvm_ir_dataset_utils.builders import builder
FLAGS = flags.FLAGS
flags.DEFINE_string('repository', None, 'The repository url to clone from.')
flags.DEFINE_string('repository_list', None,
'Path to a file containing a list of repositories.')
flags.DEFINE_string('source_dir', None,
'The directory to download source code into.')
flags.DEFINE_string('build_dir', None,
'The base directory to and perform builds in.')
flags.DEFINE_string('corpus_dir', None, 'The directory to place the corpus in.')
flags.DEFINE_integer('thread_count', 8,
'The number of threads to use per crate build.')
flags.DEFINE_string('cargo_home', '/cargo', 'The default cargo directory.')
flags.DEFINE_string('rustup_home', '/rustup',
'The default rustup home directory.')
flags.DEFINE_bool(
'archive_corpus', False,
'Whether or not to put the output corpus for each package into an archive.')
flags.mark_flag_as_required('source_dir')
flags.mark_flag_as_required('build_dir')
flags.mark_flag_as_required('corpus_dir')
@flags.multi_flags_validator(
['repository', 'repository_list'],
message=(
'Expected one and only one of --repository and --repository_list to be'
'defined.'),
)
def _validate_input_columns(flags_dict):
both_defined = flags_dict['repository'] is not None and flags_dict[
'repository_list'] is not None
neither_defined = flags_dict['repository'] is None and flags_dict[
'repository_list'] is None
return not both_defined and not neither_defined
[docs]def main(_):
ray.init()
crates_list = []
if FLAGS.repository is not None:
crates_list.append(FLAGS.repository)
elif FLAGS.repository_list is not None:
with open(FLAGS.repository_list) as repository_list_file:
crates_list = json.load(repository_list_file)
build_futures = []
for index, crate_to_build in enumerate(crates_list):
sources = []
if crate_to_build['repository'] is not None:
sources.append({
'type': 'git',
'repo_url': crate_to_build['repository'],
'commit_sha': ''
})
if crate_to_build['tar_archive'] is not None:
sources.append({
'type': 'tar',
'archive_url': crate_to_build['tar_archive']
})
corpus_description = {
'sources': sources,
'folder_name': f'build-{index}',
'build_system': 'cargo',
'license': crate_to_build['license'],
'license_source': crate_to_build['license_source']
}
additional_build_env_variables = {
'RUSTUP_HOME': FLAGS.rustup_home,
'CARGO_HOME': FLAGS.cargo_home
}
build_futures.append(
builder.get_build_future(
corpus_description,
FLAGS.source_dir,
FLAGS.build_dir,
FLAGS.corpus_dir,
FLAGS.thread_count,
additional_build_env_variables,
cleanup=True,
archive_corpus=FLAGS.archive_corpus))
all_finished = []
while len(build_futures) > 0:
finished, build_futures = ray.wait(build_futures, timeout=5.0)
finished_data = ray.get(finished)
all_finished.extend(finished_data)
logging.info(
f'Just finished {len(finished_data)}, {len(all_finished)} done, {len(build_futures)} remaining'
)
if __name__ == '__main__':
app.run(main)