You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
421 lines
14 KiB
421 lines
14 KiB
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
# Copyright 2019 The ChromiumOS Authors
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""Download profdata from different arches, merge them and upload to gs.
|
|
|
|
The script is used for updating the PGO profiles for LLVM. The workflow
|
|
is that the script will download profdata from different PGO builds, merge
|
|
them and then upload it to a gs location that LLVM can access.
|
|
|
|
The simplest way of using this script, is to run:
|
|
./merge_profdata_and_upload.py --all_latest_profiles
|
|
which will automatically grab profdata from latest PGO generate builders
|
|
for three different architectures and merge them. LLVM hash is also
|
|
detected automatically from the artifacts.
|
|
|
|
If you want to specify certain llvm hash, run it with:
|
|
./merge_profdata_and_upload.py --all_latest_profiles --llvm_hash LLVM_HASH
|
|
Note that hash checking will fail if the llvm hash you provided is not the
|
|
same as those in artifacts, or llvm hash in different artifacts are not the
|
|
same.
|
|
|
|
To only use profiles from buildbucket tasks for PGO generate, run it with:
|
|
./merge_profdata_and_upload.py -b amd64/bb_id1 -b arm/bb_id2 ...
|
|
The buildbucket id can be found using `bb ls` command after manually launched
|
|
builder finishes.
|
|
|
|
There is a chance that builders only succeeded partially, in this case, you
|
|
can run this script to merge both profdata from builder scheduled and manually
|
|
launched:
|
|
./merge_profdata_and_upload.py -l arm -l amd64 -b arm64/bb_id
|
|
In this example, the script will merge profdata from arm and amd64 builder, and
|
|
profdata from an arm64 buildbucket task.
|
|
"""
|
|
|
|
|
|
import argparse
|
|
import collections
|
|
import distutils.spawn
|
|
import json
|
|
import os
|
|
import os.path
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
|
|
|
|
_LLVM_PROFDATA = "/usr/bin/llvm-profdata"
|
|
_GS_PREFIX = "gs://"
|
|
|
|
_LLVMMetadata = collections.namedtuple("_LLVMMetadata", ["head_sha"])
|
|
|
|
|
|
def _fetch_gs_artifact(remote_name, local_name):
|
|
"""Fetch single file from remote gs location to local.
|
|
|
|
Args:
|
|
remote_name: full gs location to the file.
|
|
local_name: the name of local file to be copied to.
|
|
"""
|
|
assert remote_name.startswith(_GS_PREFIX)
|
|
subprocess.check_call(["gsutil", "cp", remote_name, local_name])
|
|
|
|
|
|
def _get_gs_profdata(remote_profdata, arch):
|
|
"""Fetch and extract profdata from remote gs location.
|
|
|
|
Args:
|
|
remote_profdata: remote gs location of the profdata tarball.
|
|
arch: directory named with arch to saperate each profdata.
|
|
|
|
Returns:
|
|
Local location of the extracted profdata.
|
|
"""
|
|
tar = "llvm_profdata.tar.xz"
|
|
_fetch_gs_artifact(remote_profdata, tar)
|
|
extract_cmd = ["tar", "-xvf", tar]
|
|
|
|
profdata_name = subprocess.check_output(extract_cmd).strip()
|
|
# The output of the `tar` command should only contain one line of the
|
|
# extracted profdata name.
|
|
if b".llvm.profdata" not in profdata_name:
|
|
raise RuntimeError("No profdata in the tarball: %s" % remote_profdata)
|
|
|
|
os.mkdir(arch)
|
|
profdata_loc = os.path.join(arch, "llvm.profdata")
|
|
os.rename(profdata_name, profdata_loc)
|
|
print("Profdata extracted to: %s" % profdata_loc)
|
|
return profdata_loc
|
|
|
|
|
|
def _get_gs_metadata(remote_metadata):
|
|
"""Fetch metadata from remote gs location and read the LLVM head_sha.
|
|
|
|
Args:
|
|
remote_metadata: remote gs location of the metadata json file.
|
|
|
|
Returns:
|
|
LLVM head_sha metadata
|
|
"""
|
|
metadata_basename = "llvm_metadata.json"
|
|
_fetch_gs_artifact(remote_metadata, metadata_basename)
|
|
|
|
with open(metadata_basename) as f:
|
|
result = json.load(f)
|
|
|
|
return _LLVMMetadata(head_sha=result["head_sha"])
|
|
|
|
|
|
def _find_latest_artifacts(gs_url, arch):
|
|
"""Fetch the latest profdata and metadata from a give gs location.
|
|
|
|
Args:
|
|
gs_url: a gs location containing one or more artifacts to fetch.
|
|
arch: the arch profdata collected from.
|
|
|
|
Returns:
|
|
A tuple of local profdata location and metadata
|
|
"""
|
|
assert gs_url.startswith(_GS_PREFIX)
|
|
try:
|
|
# List all artifacts in the gs location and sort by time.
|
|
output = (
|
|
subprocess.check_output(
|
|
["gsutil", "ls", "-l", gs_url], encoding="utf-8"
|
|
)
|
|
.strip()
|
|
.split("\n")
|
|
)
|
|
lines = sorted(output, key=lambda x: x.split()[1], reverse=True)
|
|
except subprocess.CalledProcessError:
|
|
raise RuntimeError("Artifacts not found: %s" % gs_url)
|
|
|
|
# Use a loop to go through all artifacts to find the latest profdata.
|
|
# An example of the output of latest builder bucket:
|
|
# pylint: disable=line-too-long
|
|
# 5006528 2020-05-31T10:08:48Z gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm.profdata.tar.xz
|
|
# 56 2020-05-31T10:08:48Z gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm_metadata.json
|
|
# 5005952 2020-05-24T10:53:34Z gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r5-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm.profdata.tar.xz
|
|
# 56 2020-05-24T10:53:34Z gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r5-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm_metadata.json
|
|
# An example for the lines of buildbucket location:
|
|
# 5004260 2020-05-29T09:48:04Z gs://chromeos-image-archive/arm-pgo-generate-llvm-next-toolchain/R85-13254.0.0-1-8879010326583123168/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm.profdata.tar.xz
|
|
# 56 2020-05-29T09:48:04Z gs://chromeos-image-archive/arm-pgo-generate-llvm-next-toolchain/R85-13254.0.0-1-8879010326583123168/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm_metadata.json
|
|
# pylint: enable=line-too-long
|
|
profdata_url = ""
|
|
for line in lines:
|
|
url = line.split()[-1]
|
|
if ".llvm.profdata.tar.xz" in url:
|
|
profile_path = _get_gs_profdata(url, arch)
|
|
profdata_url = url
|
|
break
|
|
if not profile_path or not profdata_url:
|
|
raise RuntimeError("No profdata found from %s" % gs_url)
|
|
|
|
metadata_url = profdata_url.replace(
|
|
".llvm.profdata.tar.xz", ".llvm_metadata.json"
|
|
)
|
|
metadata = _get_gs_metadata(metadata_url)
|
|
if not metadata:
|
|
raise RuntimeError("No metadata found from %s" % gs_url)
|
|
return metadata, profile_path
|
|
|
|
|
|
def _fetch_from_latest(arch):
|
|
"""Fetch artifacts from latest builders.
|
|
|
|
Args:
|
|
arch: the arch profdata collected from.
|
|
|
|
Returns:
|
|
A tuple of local profdata location and metadata
|
|
"""
|
|
print("\nFETCHING LATEST PROFDATA ON %s..." % arch.upper())
|
|
remote_latest = "%schromeos-toolchain-artifacts/llvm-pgo/%s" % (
|
|
_GS_PREFIX,
|
|
arch,
|
|
)
|
|
return _find_latest_artifacts(remote_latest, arch)
|
|
|
|
|
|
def _fetch_from_buildbucket(arch, bb):
|
|
"""Fetch artifacts from buildbucket task.
|
|
|
|
Args:
|
|
arch: the arch profdata collected from.
|
|
bb: buildbucket id.
|
|
|
|
Returns:
|
|
A tuple of local profdata location and metadata
|
|
"""
|
|
print("\nFETCHING BUILDBUCKET PROFDATA ON %s..." % arch.upper())
|
|
remote_arch = (
|
|
"%schromeos-image-archive/%s-pgo-generate-llvm-next-toolchain"
|
|
% (
|
|
_GS_PREFIX,
|
|
arch,
|
|
)
|
|
)
|
|
# List all buckets under {arch}-pgo-generate-llvm-next-toolchain and
|
|
# grep with buildbucket id.
|
|
remote_bb = (
|
|
subprocess.check_output(["gsutil", "ls", remote_arch], encoding="utf-8")
|
|
.strip()
|
|
.split("\n")
|
|
)
|
|
for line in remote_bb:
|
|
if bb in line:
|
|
return _find_latest_artifacts(line, arch)
|
|
raise RuntimeError(
|
|
"No matched results found in %s with bb: %s" % (arch, bb)
|
|
)
|
|
|
|
|
|
def _merge_profdata(profdata_list, output_name):
|
|
"""Merge profdata.
|
|
|
|
Args:
|
|
profdata_list: list of profdata location of each arch.
|
|
output_name: name of merged profdata.
|
|
"""
|
|
merge_cmd = [
|
|
_LLVM_PROFDATA,
|
|
"merge",
|
|
"-output",
|
|
output_name,
|
|
] + profdata_list
|
|
print("\nMerging PGO profiles.\nCMD: %s" % merge_cmd)
|
|
subprocess.check_call(merge_cmd)
|
|
|
|
|
|
def _tar_and_upload_profdata(profdata, name_suffix):
|
|
"""Create a tarball of merged profdata and upload to certain gs location.
|
|
|
|
Args:
|
|
profdata: location of merged profdata.
|
|
name_suffix: usually the LLVM head_sha.
|
|
"""
|
|
tarball = "llvm-profdata-%s.tar.xz" % name_suffix
|
|
print("Making profdata tarball: %s" % tarball)
|
|
subprocess.check_call(
|
|
["tar", "--sparse", "-I", "xz", "-cf", tarball, profdata]
|
|
)
|
|
|
|
upload_location = "%schromeos-localmirror/distfiles/%s" % (
|
|
_GS_PREFIX,
|
|
tarball,
|
|
)
|
|
|
|
# TODO: it's better to create a subdir: distfiles/llvm_pgo_profile, but
|
|
# now llvm could only recognize distfiles.
|
|
upload_cmd = [
|
|
"gsutil",
|
|
"-m",
|
|
"cp",
|
|
"-n",
|
|
"-a",
|
|
"public-read",
|
|
tarball,
|
|
upload_location,
|
|
]
|
|
print("\nUploading tarball to gs.\nCMD: %s\n" % upload_cmd)
|
|
|
|
# gsutil prints all status to stderr, oddly enough.
|
|
gs_output = subprocess.check_output(
|
|
upload_cmd, stderr=subprocess.STDOUT, encoding="utf-8"
|
|
)
|
|
|
|
# gsutil exits successfully even if it uploaded nothing. It prints a summary
|
|
# of what all it did, though. Successful uploads are just a progress bar,
|
|
# unsuccessful ones note that items were skipped.
|
|
if "Skipping existing item" in gs_output:
|
|
raise ValueError(
|
|
"Profile upload failed: would overwrite an existing "
|
|
"profile at %s" % upload_location
|
|
)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description=__doc__,
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
parser.add_argument(
|
|
"-a",
|
|
"--all_latest_profiles",
|
|
action="store_true",
|
|
help="Merge and upload profiles from the latest builders.",
|
|
)
|
|
parser.add_argument(
|
|
"-l",
|
|
"--latest",
|
|
default=[],
|
|
action="append",
|
|
help="User can specify the profdata from which builder with specific "
|
|
"architecture to download. By default, we merge profdata from arm, "
|
|
"arm64, amd64.",
|
|
)
|
|
parser.add_argument(
|
|
"-b",
|
|
"--buildbucket",
|
|
default=[],
|
|
action="append",
|
|
help="Extra pgo-generate-llvm-next-toolchain buildbucket results to be "
|
|
"used. Format should be: {arch}/{bb_id}.",
|
|
)
|
|
parser.add_argument(
|
|
"-o",
|
|
"--output",
|
|
default="llvm.profdata",
|
|
help="Where to put merged PGO profile. The default is to not save it "
|
|
"anywhere.",
|
|
)
|
|
parser.add_argument(
|
|
"--llvm_hash",
|
|
help="The LLVM hash to select for the profiles. Generally autodetected.",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
if not args.all_latest_profiles and not (args.latest or args.buildbucket):
|
|
parser.error(
|
|
"Please specify whether to use latest profiles or "
|
|
"profiles from buildbucket"
|
|
)
|
|
|
|
if args.all_latest_profiles and (args.latest or args.buildbucket):
|
|
parser.error(
|
|
"--all_latest_profiles cannot be specified together "
|
|
"with --latest or --buildbucket"
|
|
)
|
|
|
|
latest = (
|
|
["arm", "arm64", "amd64"] if args.all_latest_profiles else args.latest
|
|
)
|
|
|
|
all_arch_list = latest.copy()
|
|
arch_bb_list = []
|
|
if args.buildbucket:
|
|
for arch_bb in args.buildbucket:
|
|
arch, bb = arch_bb.split("/")
|
|
arch_bb_list.append((arch, bb))
|
|
all_arch_list.append(arch)
|
|
|
|
if len(set(all_arch_list)) != len(all_arch_list):
|
|
parser.error("Each arch can be only passed once.")
|
|
|
|
if not distutils.spawn.find_executable(_LLVM_PROFDATA):
|
|
sys.exit(_LLVM_PROFDATA + " not found; are you in the chroot?")
|
|
|
|
initial_dir = os.getcwd()
|
|
temp_dir = tempfile.mkdtemp(prefix="merge_pgo")
|
|
success = True
|
|
try:
|
|
os.chdir(temp_dir)
|
|
profdata_list = []
|
|
heads = set()
|
|
|
|
def append_artifacts(fetched_tuple):
|
|
llvm_metadata, profdata_loc = fetched_tuple
|
|
if os.path.getsize(profdata_loc) < 512 * 1024:
|
|
raise RuntimeError(
|
|
"The PGO profile in local path %s is suspiciously "
|
|
"small. Something might have gone "
|
|
"wrong." % profdata_loc
|
|
)
|
|
heads.add(llvm_metadata.head_sha)
|
|
profdata_list.append(profdata_loc)
|
|
|
|
for arch in latest:
|
|
append_artifacts(_fetch_from_latest(arch))
|
|
|
|
for arch, bb in arch_bb_list:
|
|
append_artifacts(_fetch_from_buildbucket(arch, bb))
|
|
|
|
assert heads, "Didn't fetch anything?"
|
|
|
|
def die_with_head_complaint(complaint):
|
|
extra = " (HEADs found: %s)" % sorted(heads)
|
|
raise RuntimeError(complaint.rstrip() + extra)
|
|
|
|
llvm_hash = args.llvm_hash
|
|
if not llvm_hash:
|
|
if len(heads) != 1:
|
|
die_with_head_complaint(
|
|
"%d LLVM HEADs were found, which is more than one. You probably "
|
|
"want a consistent set of HEADs for a profile. If you know you "
|
|
"don't, please specify --llvm_hash, and note that *all* profiles "
|
|
"will be merged into this final profile, regardless of their "
|
|
"reported HEAD." % len(heads)
|
|
)
|
|
(llvm_hash,) = heads
|
|
|
|
if llvm_hash not in heads:
|
|
assert llvm_hash == args.llvm_hash
|
|
die_with_head_complaint(
|
|
"HEAD %s wasn't found in any fetched artifacts." % llvm_hash
|
|
)
|
|
|
|
print("\nUsing LLVM hash: %s" % llvm_hash)
|
|
|
|
_merge_profdata(profdata_list, args.output)
|
|
print("Merged profdata locates at %s" % os.path.abspath(args.output))
|
|
_tar_and_upload_profdata(args.output, name_suffix=llvm_hash)
|
|
print("\nMerged profdata uploaded successfully.")
|
|
except:
|
|
success = False
|
|
raise
|
|
finally:
|
|
os.chdir(initial_dir)
|
|
if success:
|
|
print("Clearing temp directory.")
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
else:
|
|
print("Script fails, temp directory is at: %s" % temp_dir)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|