You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
403 lines
15 KiB
403 lines
15 KiB
# -*- coding: utf-8 -*-
|
|
# Copyright 2011 The ChromiumOS Authors
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""The experiment runner module."""
|
|
|
|
import getpass
|
|
import os
|
|
import shutil
|
|
import time
|
|
|
|
from cros_utils import command_executer
|
|
from cros_utils import logger
|
|
from cros_utils.email_sender import EmailSender
|
|
from cros_utils.file_utils import FileUtils
|
|
from experiment_status import ExperimentStatus
|
|
import lock_machine
|
|
from results_cache import CacheConditions
|
|
from results_cache import ResultsCache
|
|
from results_report import HTMLResultsReport
|
|
from results_report import JSONResultsReport
|
|
from results_report import TextResultsReport
|
|
from schedv2 import Schedv2
|
|
import test_flag
|
|
|
|
import config
|
|
|
|
|
|
def _WriteJSONReportToFile(experiment, results_dir, json_report):
|
|
"""Writes a JSON report to a file in results_dir."""
|
|
has_llvm = any("llvm" in l.compiler for l in experiment.labels)
|
|
compiler_string = "llvm" if has_llvm else "gcc"
|
|
board = experiment.labels[0].board
|
|
filename = "report_%s_%s_%s.%s.json" % (
|
|
board,
|
|
json_report.date,
|
|
json_report.time.replace(":", "."),
|
|
compiler_string,
|
|
)
|
|
fullname = os.path.join(results_dir, filename)
|
|
report_text = json_report.GetReport()
|
|
with open(fullname, "w") as out_file:
|
|
out_file.write(report_text)
|
|
|
|
|
|
class ExperimentRunner(object):
|
|
"""ExperimentRunner Class."""
|
|
|
|
STATUS_TIME_DELAY = 30
|
|
THREAD_MONITOR_DELAY = 2
|
|
|
|
SUCCEEDED = 0
|
|
HAS_FAILURE = 1
|
|
ALL_FAILED = 2
|
|
|
|
def __init__(
|
|
self,
|
|
experiment,
|
|
json_report,
|
|
using_schedv2=False,
|
|
log=None,
|
|
cmd_exec=None,
|
|
):
|
|
self._experiment = experiment
|
|
self.l = log or logger.GetLogger(experiment.log_dir)
|
|
self._ce = cmd_exec or command_executer.GetCommandExecuter(self.l)
|
|
self._terminated = False
|
|
self.json_report = json_report
|
|
self.locked_machines = []
|
|
if experiment.log_level != "verbose":
|
|
self.STATUS_TIME_DELAY = 10
|
|
|
|
# Setting this to True will use crosperf sched v2 (feature in progress).
|
|
self._using_schedv2 = using_schedv2
|
|
|
|
def _GetMachineList(self):
|
|
"""Return a list of all requested machines.
|
|
|
|
Create a list of all the requested machines, both global requests and
|
|
label-specific requests, and return the list.
|
|
"""
|
|
machines = self._experiment.remote
|
|
# All Label.remote is a sublist of experiment.remote.
|
|
for l in self._experiment.labels:
|
|
for r in l.remote:
|
|
assert r in machines
|
|
return machines
|
|
|
|
def _UpdateMachineList(self, locked_machines):
|
|
"""Update machines lists to contain only locked machines.
|
|
|
|
Go through all the lists of requested machines, both global and
|
|
label-specific requests, and remove any machine that we were not
|
|
able to lock.
|
|
|
|
Args:
|
|
locked_machines: A list of the machines we successfully locked.
|
|
"""
|
|
for m in self._experiment.remote:
|
|
if m not in locked_machines:
|
|
self._experiment.remote.remove(m)
|
|
|
|
for l in self._experiment.labels:
|
|
for m in l.remote:
|
|
if m not in locked_machines:
|
|
l.remote.remove(m)
|
|
|
|
def _GetMachineType(self, lock_mgr, machine):
|
|
"""Get where is the machine from.
|
|
|
|
Returns:
|
|
The location of the machine: local or crosfleet
|
|
"""
|
|
# We assume that lab machine always starts with chromeos*, and local
|
|
# machines are ip address.
|
|
if "chromeos" in machine:
|
|
if lock_mgr.CheckMachineInCrosfleet(machine):
|
|
return "crosfleet"
|
|
else:
|
|
raise RuntimeError("Lab machine not in Crosfleet.")
|
|
return "local"
|
|
|
|
def _LockAllMachines(self, experiment):
|
|
"""Attempt to globally lock all of the machines requested for run.
|
|
|
|
This method tries to lock all machines requested for this crosperf run
|
|
in three different modes automatically, to prevent any other crosperf runs
|
|
from being able to update/use the machines while this experiment is
|
|
running:
|
|
- Crosfleet machines: Use crosfleet lease-dut mechanism to lease
|
|
- Local machines: Use file lock mechanism to lock
|
|
"""
|
|
if test_flag.GetTestMode():
|
|
self.locked_machines = self._GetMachineList()
|
|
experiment.locked_machines = self.locked_machines
|
|
else:
|
|
experiment.lock_mgr = lock_machine.LockManager(
|
|
self._GetMachineList(),
|
|
"",
|
|
experiment.labels[0].chromeos_root,
|
|
experiment.locks_dir,
|
|
log=self.l,
|
|
)
|
|
for m in experiment.lock_mgr.machines:
|
|
machine_type = self._GetMachineType(experiment.lock_mgr, m)
|
|
if machine_type == "local":
|
|
experiment.lock_mgr.AddMachineToLocal(m)
|
|
elif machine_type == "crosfleet":
|
|
experiment.lock_mgr.AddMachineToCrosfleet(m)
|
|
machine_states = experiment.lock_mgr.GetMachineStates("lock")
|
|
experiment.lock_mgr.CheckMachineLocks(machine_states, "lock")
|
|
self.locked_machines = experiment.lock_mgr.UpdateMachines(True)
|
|
experiment.locked_machines = self.locked_machines
|
|
self._UpdateMachineList(self.locked_machines)
|
|
experiment.machine_manager.RemoveNonLockedMachines(
|
|
self.locked_machines
|
|
)
|
|
if not self.locked_machines:
|
|
raise RuntimeError("Unable to lock any machines.")
|
|
|
|
def _ClearCacheEntries(self, experiment):
|
|
for br in experiment.benchmark_runs:
|
|
cache = ResultsCache()
|
|
cache.Init(
|
|
br.label.chromeos_image,
|
|
br.label.chromeos_root,
|
|
br.benchmark.test_name,
|
|
br.iteration,
|
|
br.test_args,
|
|
br.profiler_args,
|
|
br.machine_manager,
|
|
br.machine,
|
|
br.label.board,
|
|
br.cache_conditions,
|
|
br.logger(),
|
|
br.log_level,
|
|
br.label,
|
|
br.share_cache,
|
|
br.benchmark.suite,
|
|
br.benchmark.show_all_results,
|
|
br.benchmark.run_local,
|
|
br.benchmark.cwp_dso,
|
|
)
|
|
cache_dir = cache.GetCacheDirForWrite()
|
|
if os.path.exists(cache_dir):
|
|
self.l.LogOutput("Removing cache dir: %s" % cache_dir)
|
|
shutil.rmtree(cache_dir)
|
|
|
|
def _Run(self, experiment):
|
|
try:
|
|
# We should not lease machines if tests are launched via `crosfleet
|
|
# create-test`. This is because leasing DUT in crosfleet will create a
|
|
# no-op task on the DUT and new test created will be hanging there.
|
|
# TODO(zhizhouy): Need to check whether machine is ready or not before
|
|
# assigning a test to it.
|
|
if not experiment.no_lock and not experiment.crosfleet:
|
|
self._LockAllMachines(experiment)
|
|
# Calculate all checksums of avaiable/locked machines, to ensure same
|
|
# label has same machines for testing
|
|
experiment.SetCheckSums(forceSameImage=True)
|
|
if self._using_schedv2:
|
|
schedv2 = Schedv2(experiment)
|
|
experiment.set_schedv2(schedv2)
|
|
if CacheConditions.FALSE in experiment.cache_conditions:
|
|
self._ClearCacheEntries(experiment)
|
|
status = ExperimentStatus(experiment)
|
|
experiment.Run()
|
|
last_status_time = 0
|
|
last_status_string = ""
|
|
try:
|
|
if experiment.log_level != "verbose":
|
|
self.l.LogStartDots()
|
|
while not experiment.IsComplete():
|
|
if last_status_time + self.STATUS_TIME_DELAY < time.time():
|
|
last_status_time = time.time()
|
|
border = "=============================="
|
|
if experiment.log_level == "verbose":
|
|
self.l.LogOutput(border)
|
|
self.l.LogOutput(status.GetProgressString())
|
|
self.l.LogOutput(status.GetStatusString())
|
|
self.l.LogOutput(border)
|
|
else:
|
|
current_status_string = status.GetStatusString()
|
|
if current_status_string != last_status_string:
|
|
self.l.LogEndDots()
|
|
self.l.LogOutput(border)
|
|
self.l.LogOutput(current_status_string)
|
|
self.l.LogOutput(border)
|
|
last_status_string = current_status_string
|
|
else:
|
|
self.l.LogAppendDot()
|
|
time.sleep(self.THREAD_MONITOR_DELAY)
|
|
except KeyboardInterrupt:
|
|
self._terminated = True
|
|
self.l.LogError("Ctrl-c pressed. Cleaning up...")
|
|
experiment.Terminate()
|
|
raise
|
|
except SystemExit:
|
|
self._terminated = True
|
|
self.l.LogError("Unexpected exit. Cleaning up...")
|
|
experiment.Terminate()
|
|
raise
|
|
finally:
|
|
experiment.Cleanup()
|
|
|
|
def _PrintTable(self, experiment):
|
|
self.l.LogOutput(
|
|
TextResultsReport.FromExperiment(experiment).GetReport()
|
|
)
|
|
|
|
def _Email(self, experiment):
|
|
# Only email by default if a new run was completed.
|
|
send_mail = False
|
|
for benchmark_run in experiment.benchmark_runs:
|
|
if not benchmark_run.cache_hit:
|
|
send_mail = True
|
|
break
|
|
if (
|
|
not send_mail
|
|
and not experiment.email_to
|
|
or config.GetConfig("no_email")
|
|
):
|
|
return
|
|
|
|
label_names = []
|
|
for label in experiment.labels:
|
|
label_names.append(label.name)
|
|
subject = "%s: %s" % (experiment.name, " vs. ".join(label_names))
|
|
|
|
text_report = TextResultsReport.FromExperiment(
|
|
experiment, True
|
|
).GetReport()
|
|
text_report += (
|
|
"\nResults are stored in %s.\n" % experiment.results_directory
|
|
)
|
|
text_report = "<pre style='font-size: 13px'>%s</pre>" % text_report
|
|
html_report = HTMLResultsReport.FromExperiment(experiment).GetReport()
|
|
attachment = EmailSender.Attachment("report.html", html_report)
|
|
email_to = experiment.email_to or []
|
|
email_to.append(getpass.getuser())
|
|
EmailSender().SendEmail(
|
|
email_to,
|
|
subject,
|
|
text_report,
|
|
attachments=[attachment],
|
|
msg_type="html",
|
|
)
|
|
|
|
def _StoreResults(self, experiment):
|
|
if self._terminated:
|
|
return self.ALL_FAILED
|
|
|
|
results_directory = experiment.results_directory
|
|
FileUtils().RmDir(results_directory)
|
|
FileUtils().MkDirP(results_directory)
|
|
self.l.LogOutput("Storing experiment file in %s." % results_directory)
|
|
experiment_file_path = os.path.join(results_directory, "experiment.exp")
|
|
FileUtils().WriteFile(experiment_file_path, experiment.experiment_file)
|
|
|
|
has_failure = False
|
|
all_failed = True
|
|
|
|
topstats_file = os.path.join(results_directory, "topstats.log")
|
|
self.l.LogOutput(
|
|
"Storing top statistics of each benchmark run into %s."
|
|
% topstats_file
|
|
)
|
|
with open(topstats_file, "w") as top_fd:
|
|
for benchmark_run in experiment.benchmark_runs:
|
|
if benchmark_run.result:
|
|
# FIXME: Pylint has a bug suggesting the following change, which
|
|
# should be fixed in pylint 2.0. Resolve this after pylint >= 2.0.
|
|
# Bug: https://github.com/PyCQA/pylint/issues/1984
|
|
# pylint: disable=simplifiable-if-statement
|
|
if benchmark_run.result.retval:
|
|
has_failure = True
|
|
else:
|
|
all_failed = False
|
|
# Header with benchmark run name.
|
|
top_fd.write("%s\n" % str(benchmark_run))
|
|
# Formatted string with top statistics.
|
|
top_fd.write(benchmark_run.result.FormatStringTopCommands())
|
|
top_fd.write("\n\n")
|
|
|
|
if all_failed:
|
|
return self.ALL_FAILED
|
|
|
|
self.l.LogOutput("Storing results of each benchmark run.")
|
|
for benchmark_run in experiment.benchmark_runs:
|
|
if benchmark_run.result:
|
|
benchmark_run_name = "".join(
|
|
ch for ch in benchmark_run.name if ch.isalnum()
|
|
)
|
|
benchmark_run_path = os.path.join(
|
|
results_directory, benchmark_run_name
|
|
)
|
|
if experiment.compress_results:
|
|
benchmark_run.result.CompressResultsTo(benchmark_run_path)
|
|
else:
|
|
benchmark_run.result.CopyResultsTo(benchmark_run_path)
|
|
benchmark_run.result.CleanUp(
|
|
benchmark_run.benchmark.rm_chroot_tmp
|
|
)
|
|
|
|
self.l.LogOutput("Storing results report in %s." % results_directory)
|
|
results_table_path = os.path.join(results_directory, "results.html")
|
|
report = HTMLResultsReport.FromExperiment(experiment).GetReport()
|
|
if self.json_report:
|
|
json_report = JSONResultsReport.FromExperiment(
|
|
experiment, json_args={"indent": 2}
|
|
)
|
|
_WriteJSONReportToFile(experiment, results_directory, json_report)
|
|
|
|
FileUtils().WriteFile(results_table_path, report)
|
|
|
|
self.l.LogOutput(
|
|
"Storing email message body in %s." % results_directory
|
|
)
|
|
msg_file_path = os.path.join(results_directory, "msg_body.html")
|
|
text_report = TextResultsReport.FromExperiment(
|
|
experiment, True
|
|
).GetReport()
|
|
text_report += (
|
|
"\nResults are stored in %s.\n" % experiment.results_directory
|
|
)
|
|
msg_body = "<pre style='font-size: 13px'>%s</pre>" % text_report
|
|
FileUtils().WriteFile(msg_file_path, msg_body)
|
|
|
|
return self.SUCCEEDED if not has_failure else self.HAS_FAILURE
|
|
|
|
def Run(self):
|
|
try:
|
|
self._Run(self._experiment)
|
|
finally:
|
|
# Always print the report at the end of the run.
|
|
self._PrintTable(self._experiment)
|
|
ret = self._StoreResults(self._experiment)
|
|
if ret != self.ALL_FAILED:
|
|
self._Email(self._experiment)
|
|
return ret
|
|
|
|
|
|
class MockExperimentRunner(ExperimentRunner):
|
|
"""Mocked ExperimentRunner for testing."""
|
|
|
|
def __init__(self, experiment, json_report):
|
|
super(MockExperimentRunner, self).__init__(experiment, json_report)
|
|
|
|
def _Run(self, experiment):
|
|
self.l.LogOutput(
|
|
"Would run the following experiment: '%s'." % experiment.name
|
|
)
|
|
|
|
def _PrintTable(self, experiment):
|
|
self.l.LogOutput("Would print the experiment table.")
|
|
|
|
def _Email(self, experiment):
|
|
self.l.LogOutput("Would send result email.")
|
|
|
|
def _StoreResults(self, experiment):
|
|
self.l.LogOutput("Would store the results.")
|