You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
383 lines
15 KiB
383 lines
15 KiB
/*
|
|
* Copyright (C) 2023 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "./execute.h"
|
|
|
|
#include <linux/securebits.h>
|
|
#include <linux/uio.h>
|
|
#include <seccomp_policy.h>
|
|
#include <sys/capability.h>
|
|
#include <sys/personality.h>
|
|
#include <sys/prctl.h>
|
|
#include <sys/ptrace.h>
|
|
#include <sys/wait.h>
|
|
#include <unistd.h>
|
|
|
|
#include <iostream>
|
|
#include <memory>
|
|
|
|
#include "./elf-utils.h"
|
|
#include "./registers.h"
|
|
#include "./shell-code.h"
|
|
|
|
namespace shell_as {
|
|
|
|
namespace {
|
|
|
|
// Capabilities are implemented as a 64-bit bit-vector. Therefore the maximum
|
|
// number of capabilities supported by a kernel is 64.
|
|
constexpr cap_value_t kMaxCapabilities = 64;
|
|
|
|
bool DropPreExecPrivileges(const shell_as::SecurityContext* context) {
|
|
// The ordering here is important:
|
|
// (1) The platform's seccomp filters disallow setresgiud, so it must come
|
|
// before the seccomp drop.
|
|
// (2) Adding seccomp filters must happen before setresuid because setresuid
|
|
// drops some capabilities which are required for seccomp.
|
|
if (context->group_id.has_value() &&
|
|
setresgid(context->group_id.value(), context->group_id.value(),
|
|
context->group_id.value()) != 0) {
|
|
std::cerr << "Unable to set group id: " << context->group_id.value()
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
if (context->supplementary_group_ids.has_value() &&
|
|
setgroups(context->supplementary_group_ids.value().size(),
|
|
context->supplementary_group_ids.value().data()) != 0) {
|
|
std::cerr << "Unable to set supplementary groups." << std::endl;
|
|
return false;
|
|
}
|
|
|
|
if (context->seccomp_filter.has_value()) {
|
|
switch (context->seccomp_filter.value()) {
|
|
case shell_as::kAppFilter:
|
|
set_app_seccomp_filter();
|
|
break;
|
|
case shell_as::kAppZygoteFilter:
|
|
set_app_zygote_seccomp_filter();
|
|
break;
|
|
case shell_as::kSystemFilter:
|
|
set_system_seccomp_filter();
|
|
break;
|
|
}
|
|
}
|
|
|
|
// This must be set prior to setresuid, otherwise that call will drop the
|
|
// permitted set of capabilities.
|
|
if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0) {
|
|
std::cerr << "Unable to set keep capabilities." << std::endl;
|
|
return false;
|
|
}
|
|
|
|
if (context->user_id.has_value() &&
|
|
setresuid(context->user_id.value(), context->user_id.value(),
|
|
context->user_id.value()) != 0) {
|
|
std::cerr << "Unable to set user id: " << context->user_id.value()
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
// Capabilities must be reacquired after setresuid since it still modifies
|
|
// capabilities, but it leaves the permitted set intact.
|
|
if (context->capabilities.has_value()) {
|
|
// The first step is to raise all the capabilities possible in all sets
|
|
// including the inheritable set. This defines the superset of possible
|
|
// capabilities that can be passed on after calling execve.
|
|
//
|
|
// The reason that all capabilities are raised in the inheritable set is due
|
|
// to a limitation of libcap. libcap may not contain a capability definition
|
|
// for all capabilities supported by the kernel. If this occurs, it will
|
|
// silently ignore requests to raise unknown capabilities via cap_set_flag.
|
|
//
|
|
// However, when parsing a cap_t from a text value, libcap will treat "all"
|
|
// as all possible 64 capability bits as set.
|
|
cap_t all_capabilities = cap_from_text("all+pie");
|
|
if (cap_set_proc(all_capabilities) != 0) {
|
|
std::cerr << "Unable to raise inheritable capability set." << std::endl;
|
|
cap_free(all_capabilities);
|
|
return false;
|
|
}
|
|
cap_free(all_capabilities);
|
|
|
|
// The second step is to raise the /desired/ capability subset in the
|
|
// ambient capability set. These are the capabilities that will actually be
|
|
// passed to the process after execve.
|
|
if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0) != 0) {
|
|
std::cerr << "Unable to clear ambient capabilities." << std::endl;
|
|
return false;
|
|
}
|
|
cap_t desired_capabilities = context->capabilities.value();
|
|
for (cap_value_t cap = 0; cap < kMaxCapabilities; cap++) {
|
|
// Skip capability values not supported by the kernel.
|
|
if (!CAP_IS_SUPPORTED(cap)) {
|
|
continue;
|
|
}
|
|
cap_flag_value_t value = CAP_CLEAR;
|
|
if (cap_get_flag(desired_capabilities, cap, CAP_PERMITTED, &value) == 0 &&
|
|
value == CAP_SET) {
|
|
if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0) != 0) {
|
|
std::cerr << "Unable to raise capability " << cap
|
|
<< " in the ambient set." << std::endl;
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// The final step is to raise the SECBIT_NOROOT flag. The kernel has special
|
|
// case logic that treats root calling execve differently than other users.
|
|
//
|
|
// By default all bits in the permitted set prior to calling execve will be
|
|
// raised after calling execve. This would ignore the work above and result
|
|
// in the process to have all capabilities.
|
|
//
|
|
// Setting the SECBIT_NOROOT disables this special casing for root and
|
|
// causes the kernel to treat it as any other UID.
|
|
int64_t secure_bits = prctl(PR_GET_SECUREBITS, 0, 0, 0, 0);
|
|
if (secure_bits < 0 ||
|
|
prctl(PR_SET_SECUREBITS, secure_bits | SECBIT_NOROOT, 0, 0, 0) != 0) {
|
|
std::cerr << "Unable to raise SECBIT_NOROOT." << std::endl;
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
uint8_t ReadChildByte(const pid_t process, const uintptr_t address) {
|
|
uintptr_t data = ptrace(PTRACE_PEEKDATA, process, address, nullptr);
|
|
return ((uint8_t*)&data)[0];
|
|
}
|
|
|
|
void WriteChildByte(const pid_t process, const uintptr_t address,
|
|
const uint8_t value) {
|
|
// This is not the most efficient way to write data to a process. However, it
|
|
// reduces code complexity of handling different word sizes and reading and
|
|
// writing memory that is not a multiple of the native word size.
|
|
uintptr_t data = ptrace(PTRACE_PEEKDATA, process, address, nullptr);
|
|
((uint8_t*)&data)[0] = value;
|
|
ptrace(PTRACE_POKEDATA, process, address, data);
|
|
}
|
|
|
|
void ReadChildMemory(const pid_t process, uintptr_t process_address,
|
|
uint8_t* bytes, size_t byte_count) {
|
|
for (; byte_count != 0; byte_count--, bytes++, process_address++) {
|
|
*bytes = ReadChildByte(process, process_address);
|
|
}
|
|
}
|
|
|
|
void WriteChildMemory(const pid_t process, uintptr_t process_address,
|
|
uint8_t const* bytes, size_t byte_count) {
|
|
for (; byte_count != 0; byte_count--, bytes++, process_address++) {
|
|
WriteChildByte(process, process_address, *bytes);
|
|
}
|
|
}
|
|
|
|
// Executes shell code in a target process.
|
|
//
|
|
// The following assumptions are made:
|
|
// * The process is currently being ptraced and that the process has already
|
|
// stopped.
|
|
// * The shell code will raise SIGSTOP when it has finished as signal that
|
|
// control flow should be handed back to the original code.
|
|
// * The shell code only alters registers and pushes values onto the stack.
|
|
//
|
|
// Execution is performed by overwriting the memory under the current
|
|
// instruction pointer with the shell code. After the shell code signals
|
|
// completion the original register state and memory are restored.
|
|
//
|
|
// If the above assumptions are met, then this function will leave the process
|
|
// in a stopped state that is equivalent to the original state.
|
|
bool ExecuteShellCode(const pid_t process, const uint8_t* shell_code,
|
|
const size_t shell_code_size) {
|
|
REGISTER_STRUCT registers;
|
|
struct iovec registers_iovec;
|
|
registers_iovec.iov_base = ®isters;
|
|
registers_iovec.iov_len = sizeof(REGISTER_STRUCT);
|
|
ptrace(PTRACE_GETREGSET, process, 1, ®isters_iovec);
|
|
|
|
std::unique_ptr<uint8_t[]> memory_backup(new uint8_t[shell_code_size]);
|
|
ReadChildMemory(process, PROGRAM_COUNTER(registers), memory_backup.get(),
|
|
shell_code_size);
|
|
WriteChildMemory(process, PROGRAM_COUNTER(registers), shell_code,
|
|
shell_code_size);
|
|
|
|
// Execute the shell code and wait for the signal that it has finished.
|
|
ptrace(PTRACE_CONT, process, NULL, NULL);
|
|
int status;
|
|
waitpid(process, &status, 0);
|
|
if (status >> 8 != SIGSTOP) {
|
|
std::cerr << "Failed to execute SELinux shellcode." << std::endl;
|
|
return false;
|
|
}
|
|
|
|
ptrace(PTRACE_SETREGSET, process, 1, ®isters_iovec);
|
|
WriteChildMemory(process, PROGRAM_COUNTER(registers), memory_backup.get(),
|
|
shell_code_size);
|
|
return true;
|
|
}
|
|
|
|
bool SetProgramCounter(const pid_t process_id, uint64_t program_counter) {
|
|
REGISTER_STRUCT registers;
|
|
struct iovec registers_iovec;
|
|
registers_iovec.iov_base = ®isters;
|
|
registers_iovec.iov_len = sizeof(REGISTER_STRUCT);
|
|
if (ptrace(PTRACE_GETREGSET, process_id, 1, ®isters_iovec) != 0) {
|
|
return false;
|
|
}
|
|
PROGRAM_COUNTER(registers) = program_counter;
|
|
if ((ptrace(PTRACE_SETREGSET, process_id, 1, ®isters_iovec)) != 0) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool StepToEntryPoint(const pid_t process_id) {
|
|
bool is_arm_mode;
|
|
uint64_t entry_address;
|
|
if (!GetElfEntryPoint(process_id, &entry_address, &is_arm_mode)) {
|
|
std::cerr << "Not able to determine Elf entry point." << std::endl;
|
|
return false;
|
|
}
|
|
if (is_arm_mode) {
|
|
// TODO(willcoster): If there is a need to handle ARM mode instructions in
|
|
// addition to thumb instructions update this with ARM mode shell code.
|
|
std::cerr << "Attempting to run an ARM-mode binary. "
|
|
<< "shell-as currently only supports thumb-mode. "
|
|
<< "Bug willcoster@ if you run into this error." << std::endl;
|
|
return false;
|
|
}
|
|
|
|
int expected_signal = 0;
|
|
size_t trap_code_size = 0;
|
|
std::unique_ptr<uint8_t[]> trap_code =
|
|
GetTrapShellCode(&expected_signal, &trap_code_size);
|
|
std::unique_ptr<uint8_t[]> backup(new uint8_t[trap_code_size]);
|
|
|
|
// Set a break point at the entry point declared by the Elf file. When a
|
|
// statically linked binary is executed this will be the first instruction
|
|
// executed.
|
|
//
|
|
// When a dynamically linked binary is executed, the dynamic linker is
|
|
// executed first. This brings .so files into memory and resolves shared
|
|
// symbols. Once this process is finished, it jumps to the entry point
|
|
// declared in the Elf file.
|
|
ReadChildMemory(process_id, entry_address, backup.get(), trap_code_size);
|
|
WriteChildMemory(process_id, entry_address, trap_code.get(), trap_code_size);
|
|
ptrace(PTRACE_CONT, process_id, NULL, NULL);
|
|
int status;
|
|
waitpid(process_id, &status, 0);
|
|
if (status >> 8 != expected_signal) {
|
|
std::cerr << "Program exited unexpectedly while stepping to entry point."
|
|
<< std::endl;
|
|
std::cerr << "Expected status " << expected_signal << " but encountered "
|
|
<< (status >> 8) << std::endl;
|
|
return false;
|
|
}
|
|
|
|
if (!SetProgramCounter(process_id, entry_address)) {
|
|
return false;
|
|
}
|
|
WriteChildMemory(process_id, entry_address, backup.get(), trap_code_size);
|
|
return true;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
bool ExecuteInContext(char* const executable_and_args[],
|
|
const shell_as::SecurityContext* context) {
|
|
// Getting an executable running in a lower privileged context is tricky with
|
|
// SELinux. The recommended approach in the documentation is to use setexeccon
|
|
// which sets the context on the next execve call.
|
|
//
|
|
// However, this doesn't work for unprivileged processes like untrusted apps
|
|
// in Android because they are not allowed to execute most binaries.
|
|
//
|
|
// To work around this, ptrace is used to inject shell code into the new
|
|
// process just after it has executed an execve syscall. This shell code then
|
|
// sets the desired SELinux context.
|
|
pid_t child = fork();
|
|
if (child == 0) {
|
|
// Disabling ASLR makes it easier to determine the entry point of the target
|
|
// executable.
|
|
personality(ADDR_NO_RANDOMIZE);
|
|
|
|
// Drop the privileges that can be dropped before executing the new binary
|
|
// and exit early if there is an issue.
|
|
if (!DropPreExecPrivileges(context)) {
|
|
exit(1);
|
|
}
|
|
|
|
ptrace(PTRACE_TRACEME, 0, NULL, NULL);
|
|
raise(SIGSTOP); // Wait for the parent process to attach.
|
|
execv(executable_and_args[0], executable_and_args);
|
|
} else {
|
|
// Wait for the child to reach the SIGSTOP line above.
|
|
int status;
|
|
waitpid(child, &status, 0);
|
|
if ((status >> 8) != SIGSTOP) {
|
|
// If the first status is not SIGSTOP, then the child aborted early
|
|
// because it was not able to set the user and group IDs.
|
|
return false;
|
|
}
|
|
|
|
// Break inside the child's execv call.
|
|
ptrace(PTRACE_SETOPTIONS, child, NULL,
|
|
PTRACE_O_TRACEEXEC | PTRACE_O_EXITKILL);
|
|
ptrace(PTRACE_CONT, child, NULL, NULL);
|
|
waitpid(child, &status, 0);
|
|
if (status >> 8 != (SIGTRAP | PTRACE_EVENT_EXEC << 8)) {
|
|
std::cerr << "Failed to execute " << executable_and_args[0] << std::endl;
|
|
return false;
|
|
}
|
|
|
|
// Allow the dynamic linker to run before dropping to a lower SELinux
|
|
// context. This is required for executing in some very constrained domains
|
|
// like mediacodec.
|
|
//
|
|
// If the context was dropped before the dynamic linker runs, then when the
|
|
// linker attempts to read /proc/self/exe to determine dynamic symbol
|
|
// information, SELinux will kill the binary if the domain is not allowed to
|
|
// read the binary's executable file.
|
|
//
|
|
// This happens for example, when attempting to run any toybox binary (id,
|
|
// sh, etc) as mediacodec.
|
|
if (!StepToEntryPoint(child)) {
|
|
std::cerr << "Something bad happened stepping to the entry point."
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
// Run the SELinux shellcode in the child process before the child can
|
|
// execute any instructions in the newly loaded executable.
|
|
if (context->selinux_context.has_value()) {
|
|
size_t shell_code_size;
|
|
std::unique_ptr<uint8_t[]> shell_code = GetSELinuxShellCode(
|
|
context->selinux_context.value(), &shell_code_size);
|
|
bool success = ExecuteShellCode(child, shell_code.get(), shell_code_size);
|
|
if (!success) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Resume and detach from the child now that the SELinux context has been
|
|
// updated.
|
|
ptrace(PTRACE_DETACH, child, NULL, NULL);
|
|
waitpid(child, nullptr, 0);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
} // namespace shell_as
|