You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
770 lines
26 KiB
770 lines
26 KiB
/*
|
|
* Copyright (C) 2020 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "snapuserd.h"
|
|
|
|
#include <csignal>
|
|
#include <optional>
|
|
#include <set>
|
|
|
|
#include <libsnapshot/snapuserd_client.h>
|
|
|
|
namespace android {
|
|
namespace snapshot {
|
|
|
|
using namespace android;
|
|
using namespace android::dm;
|
|
using android::base::unique_fd;
|
|
|
|
#define SNAP_LOG(level) LOG(level) << misc_name_ << ": "
|
|
#define SNAP_PLOG(level) PLOG(level) << misc_name_ << ": "
|
|
|
|
Snapuserd::Snapuserd(const std::string& misc_name, const std::string& cow_device,
|
|
const std::string& backing_device) {
|
|
misc_name_ = misc_name;
|
|
cow_device_ = cow_device;
|
|
backing_store_device_ = backing_device;
|
|
control_device_ = "/dev/dm-user/" + misc_name;
|
|
}
|
|
|
|
bool Snapuserd::InitializeWorkers() {
|
|
for (int i = 0; i < NUM_THREADS_PER_PARTITION; i++) {
|
|
std::unique_ptr<WorkerThread> wt = std::make_unique<WorkerThread>(
|
|
cow_device_, backing_store_device_, control_device_, misc_name_, GetSharedPtr());
|
|
|
|
worker_threads_.push_back(std::move(wt));
|
|
}
|
|
|
|
read_ahead_thread_ = std::make_unique<ReadAheadThread>(cow_device_, backing_store_device_,
|
|
misc_name_, GetSharedPtr());
|
|
return true;
|
|
}
|
|
|
|
bool Snapuserd::CommitMerge(int num_merge_ops) {
|
|
struct CowHeader* ch = reinterpret_cast<struct CowHeader*>(mapped_addr_);
|
|
ch->num_merge_ops += num_merge_ops;
|
|
|
|
if (read_ahead_feature_ && read_ahead_ops_.size() > 0) {
|
|
struct BufferState* ra_state = GetBufferState();
|
|
ra_state->read_ahead_state = kCowReadAheadInProgress;
|
|
}
|
|
|
|
int ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC);
|
|
if (ret < 0) {
|
|
PLOG(ERROR) << "msync header failed: " << ret;
|
|
return false;
|
|
}
|
|
|
|
merge_initiated_ = true;
|
|
|
|
return true;
|
|
}
|
|
|
|
void Snapuserd::PrepareReadAhead() {
|
|
if (!read_ahead_feature_) {
|
|
return;
|
|
}
|
|
|
|
struct BufferState* ra_state = GetBufferState();
|
|
// Check if the data has to be re-constructed from COW device
|
|
if (ra_state->read_ahead_state == kCowReadAheadDone) {
|
|
populate_data_from_cow_ = true;
|
|
} else {
|
|
populate_data_from_cow_ = false;
|
|
}
|
|
|
|
StartReadAhead();
|
|
}
|
|
|
|
bool Snapuserd::GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block, void* buffer) {
|
|
if (!lock->owns_lock()) {
|
|
SNAP_LOG(ERROR) << "GetRABuffer - Lock not held";
|
|
return false;
|
|
}
|
|
std::unordered_map<uint64_t, void*>::iterator it = read_ahead_buffer_map_.find(block);
|
|
|
|
// This will be true only for IO's generated as part of reading a root
|
|
// filesystem. IO's related to merge should always be in read-ahead cache.
|
|
if (it == read_ahead_buffer_map_.end()) {
|
|
return false;
|
|
}
|
|
|
|
// Theoretically, we can send the data back from the read-ahead buffer
|
|
// all the way to the kernel without memcpy. However, if the IO is
|
|
// un-aligned, the wrapper function will need to touch the read-ahead
|
|
// buffers and transitions will be bit more complicated.
|
|
memcpy(buffer, it->second, BLOCK_SZ);
|
|
return true;
|
|
}
|
|
|
|
// ========== State transition functions for read-ahead operations ===========
|
|
|
|
bool Snapuserd::GetReadAheadPopulatedBuffer(uint64_t block, void* buffer) {
|
|
if (!read_ahead_feature_) {
|
|
return false;
|
|
}
|
|
|
|
{
|
|
std::unique_lock<std::mutex> lock(lock_);
|
|
if (io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE) {
|
|
return false;
|
|
}
|
|
|
|
if (io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS) {
|
|
return GetRABuffer(&lock, block, buffer);
|
|
}
|
|
}
|
|
|
|
{
|
|
// Read-ahead thread IO is in-progress. Wait for it to complete
|
|
std::unique_lock<std::mutex> lock(lock_);
|
|
while (!(io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE ||
|
|
io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS)) {
|
|
cv.wait(lock);
|
|
}
|
|
|
|
return GetRABuffer(&lock, block, buffer);
|
|
}
|
|
}
|
|
|
|
// This is invoked by read-ahead thread waiting for merge IO's
|
|
// to complete
|
|
bool Snapuserd::WaitForMergeToComplete() {
|
|
{
|
|
std::unique_lock<std::mutex> lock(lock_);
|
|
while (!(io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_BEGIN ||
|
|
io_state_ == READ_AHEAD_IO_TRANSITION::IO_TERMINATED)) {
|
|
cv.wait(lock);
|
|
}
|
|
|
|
if (io_state_ == READ_AHEAD_IO_TRANSITION::IO_TERMINATED) {
|
|
return false;
|
|
}
|
|
|
|
io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_IN_PROGRESS;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// This is invoked during the launch of worker threads. We wait
|
|
// for read-ahead thread to by fully up before worker threads
|
|
// are launched; else we will have a race between worker threads
|
|
// and read-ahead thread specifically during re-construction.
|
|
bool Snapuserd::WaitForReadAheadToStart() {
|
|
{
|
|
std::unique_lock<std::mutex> lock(lock_);
|
|
while (!(io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS ||
|
|
io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE)) {
|
|
cv.wait(lock);
|
|
}
|
|
|
|
if (io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Invoked by worker threads when a sequence of merge operation
|
|
// is complete notifying read-ahead thread to make forward
|
|
// progress.
|
|
void Snapuserd::StartReadAhead() {
|
|
{
|
|
std::lock_guard<std::mutex> lock(lock_);
|
|
io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_BEGIN;
|
|
}
|
|
|
|
cv.notify_one();
|
|
}
|
|
|
|
void Snapuserd::MergeCompleted() {
|
|
{
|
|
std::lock_guard<std::mutex> lock(lock_);
|
|
io_state_ = READ_AHEAD_IO_TRANSITION::IO_TERMINATED;
|
|
}
|
|
|
|
cv.notify_one();
|
|
}
|
|
|
|
bool Snapuserd::ReadAheadIOCompleted(bool sync) {
|
|
if (sync) {
|
|
// Flush the entire buffer region
|
|
int ret = msync(mapped_addr_, total_mapped_addr_length_, MS_SYNC);
|
|
if (ret < 0) {
|
|
PLOG(ERROR) << "msync failed after ReadAheadIOCompleted: " << ret;
|
|
return false;
|
|
}
|
|
|
|
// Metadata and data are synced. Now, update the state.
|
|
// We need to update the state after flushing data; if there is a crash
|
|
// when read-ahead IO is in progress, the state of data in the COW file
|
|
// is unknown. kCowReadAheadDone acts as a checkpoint wherein the data
|
|
// in the scratch space is good and during next reboot, read-ahead thread
|
|
// can safely re-construct the data.
|
|
struct BufferState* ra_state = GetBufferState();
|
|
ra_state->read_ahead_state = kCowReadAheadDone;
|
|
|
|
ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC);
|
|
if (ret < 0) {
|
|
PLOG(ERROR) << "msync failed to flush Readahead completion state...";
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Notify the worker threads
|
|
{
|
|
std::lock_guard<std::mutex> lock(lock_);
|
|
io_state_ = READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS;
|
|
}
|
|
|
|
cv.notify_all();
|
|
return true;
|
|
}
|
|
|
|
void Snapuserd::ReadAheadIOFailed() {
|
|
{
|
|
std::lock_guard<std::mutex> lock(lock_);
|
|
io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE;
|
|
}
|
|
|
|
cv.notify_all();
|
|
}
|
|
|
|
//========== End of state transition functions ====================
|
|
|
|
bool Snapuserd::IsChunkIdMetadata(chunk_t chunk) {
|
|
uint32_t stride = exceptions_per_area_ + 1;
|
|
lldiv_t divresult = lldiv(chunk, stride);
|
|
|
|
return (divresult.rem == NUM_SNAPSHOT_HDR_CHUNKS);
|
|
}
|
|
|
|
// Find the next free chunk-id to be assigned. Check if the next free
|
|
// chunk-id represents a metadata page. If so, skip it.
|
|
chunk_t Snapuserd::GetNextAllocatableChunkId(chunk_t chunk) {
|
|
chunk_t next_chunk = chunk + 1;
|
|
|
|
if (IsChunkIdMetadata(next_chunk)) {
|
|
next_chunk += 1;
|
|
}
|
|
return next_chunk;
|
|
}
|
|
|
|
void Snapuserd::CheckMergeCompletionStatus() {
|
|
if (!merge_initiated_) {
|
|
SNAP_LOG(INFO) << "Merge was not initiated. Total-data-ops: " << reader_->total_data_ops();
|
|
return;
|
|
}
|
|
|
|
struct CowHeader* ch = reinterpret_cast<struct CowHeader*>(mapped_addr_);
|
|
|
|
SNAP_LOG(INFO) << "Merge-status: Total-Merged-ops: " << ch->num_merge_ops
|
|
<< " Total-data-ops: " << reader_->total_data_ops();
|
|
}
|
|
|
|
/*
|
|
* Read the metadata from COW device and
|
|
* construct the metadata as required by the kernel.
|
|
*
|
|
* Please see design on kernel COW format
|
|
*
|
|
* 1: Read the metadata from internal COW device
|
|
* 2: There are 3 COW operations:
|
|
* a: Replace op
|
|
* b: Copy op
|
|
* c: Zero op
|
|
* 3: For each of the 3 operations, op->new_block
|
|
* represents the block number in the base device
|
|
* for which one of the 3 operations have to be applied.
|
|
* This represents the old_chunk in the kernel COW format
|
|
* 4: We need to assign new_chunk for a corresponding old_chunk
|
|
* 5: The algorithm is similar to how kernel assigns chunk number
|
|
* while creating exceptions. However, there are few cases
|
|
* which needs to be addressed here:
|
|
* a: During merge process, kernel scans the metadata page
|
|
* from backwards when merge is initiated. Since, we need
|
|
* to make sure that the merge ordering follows our COW format,
|
|
* we read the COW operation from backwards and populate the
|
|
* metadata so that when kernel starts the merging from backwards,
|
|
* those ops correspond to the beginning of our COW format.
|
|
* b: Kernel can merge successive operations if the two chunk IDs
|
|
* are contiguous. This can be problematic when there is a crash
|
|
* during merge; specifically when the merge operation has dependency.
|
|
* These dependencies can only happen during copy operations.
|
|
*
|
|
* To avoid this problem, we make sure overlap copy operations
|
|
* are not batch merged.
|
|
* 6: Use a monotonically increasing chunk number to assign the
|
|
* new_chunk
|
|
* 7: Each chunk-id represents either
|
|
* a: Metadata page or
|
|
* b: Data page
|
|
* 8: Chunk-id representing a data page is stored in a map.
|
|
* 9: Chunk-id representing a metadata page is converted into a vector
|
|
* index. We store this in vector as kernel requests metadata during
|
|
* two stage:
|
|
* a: When initial dm-snapshot device is created, kernel requests
|
|
* all the metadata and stores it in its internal data-structures.
|
|
* b: During merge, kernel once again requests the same metadata
|
|
* once-again.
|
|
* In both these cases, a quick lookup based on chunk-id is done.
|
|
* 10: When chunk number is incremented, we need to make sure that
|
|
* if the chunk is representing a metadata page and skip.
|
|
* 11: Each 4k page will contain 256 disk exceptions. We call this
|
|
* exceptions_per_area_
|
|
* 12: Kernel will stop issuing metadata IO request when new-chunk ID is 0.
|
|
*/
|
|
bool Snapuserd::ReadMetadata() {
|
|
reader_ = std::make_unique<CowReader>();
|
|
CowHeader header;
|
|
CowOptions options;
|
|
bool metadata_found = false;
|
|
int replace_ops = 0, zero_ops = 0, copy_ops = 0;
|
|
|
|
SNAP_LOG(DEBUG) << "ReadMetadata: Parsing cow file";
|
|
|
|
if (!reader_->Parse(cow_fd_)) {
|
|
SNAP_LOG(ERROR) << "Failed to parse";
|
|
return false;
|
|
}
|
|
|
|
if (!reader_->GetHeader(&header)) {
|
|
SNAP_LOG(ERROR) << "Failed to get header";
|
|
return false;
|
|
}
|
|
|
|
if (!(header.block_size == BLOCK_SZ)) {
|
|
SNAP_LOG(ERROR) << "Invalid header block size found: " << header.block_size;
|
|
return false;
|
|
}
|
|
|
|
reader_->InitializeMerge();
|
|
SNAP_LOG(DEBUG) << "Merge-ops: " << header.num_merge_ops;
|
|
|
|
if (!MmapMetadata()) {
|
|
SNAP_LOG(ERROR) << "mmap failed";
|
|
return false;
|
|
}
|
|
|
|
// Initialize the iterator for reading metadata
|
|
cowop_riter_ = reader_->GetRevOpIter();
|
|
|
|
exceptions_per_area_ = (CHUNK_SIZE << SECTOR_SHIFT) / sizeof(struct disk_exception);
|
|
|
|
// Start from chunk number 2. Chunk 0 represents header and chunk 1
|
|
// represents first metadata page.
|
|
chunk_t data_chunk_id = NUM_SNAPSHOT_HDR_CHUNKS + 1;
|
|
size_t num_ops = 0;
|
|
|
|
loff_t offset = 0;
|
|
std::unique_ptr<uint8_t[]> de_ptr =
|
|
std::make_unique<uint8_t[]>(exceptions_per_area_ * sizeof(struct disk_exception));
|
|
|
|
// This memset is important. Kernel will stop issuing IO when new-chunk ID
|
|
// is 0. When Area is not filled completely with all 256 exceptions,
|
|
// this memset will ensure that metadata read is completed.
|
|
memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
|
|
|
|
while (!cowop_riter_->Done()) {
|
|
const CowOperation* cow_op = &cowop_riter_->Get();
|
|
struct disk_exception* de =
|
|
reinterpret_cast<struct disk_exception*>((char*)de_ptr.get() + offset);
|
|
|
|
if (IsMetadataOp(*cow_op)) {
|
|
cowop_riter_->Next();
|
|
continue;
|
|
}
|
|
|
|
metadata_found = true;
|
|
// This loop will handle all the replace and zero ops.
|
|
// We will handle the copy ops later as it requires special
|
|
// handling of assigning chunk-id's. Furthermore, we make
|
|
// sure that replace/zero and copy ops are not batch merged; hence,
|
|
// the bump in the chunk_id before break of this loop
|
|
if (cow_op->type == kCowCopyOp) {
|
|
data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
|
|
break;
|
|
}
|
|
|
|
if (cow_op->type == kCowReplaceOp) {
|
|
replace_ops++;
|
|
} else if (cow_op->type == kCowZeroOp) {
|
|
zero_ops++;
|
|
}
|
|
|
|
// Construct the disk-exception
|
|
de->old_chunk = cow_op->new_block;
|
|
de->new_chunk = data_chunk_id;
|
|
|
|
|
|
// Store operation pointer.
|
|
chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), cow_op));
|
|
num_ops += 1;
|
|
offset += sizeof(struct disk_exception);
|
|
cowop_riter_->Next();
|
|
|
|
SNAP_LOG(DEBUG) << num_ops << ":"
|
|
<< " Old-chunk: " << de->old_chunk << " New-chunk: " << de->new_chunk;
|
|
|
|
if (num_ops == exceptions_per_area_) {
|
|
// Store it in vector at the right index. This maps the chunk-id to
|
|
// vector index.
|
|
vec_.push_back(std::move(de_ptr));
|
|
offset = 0;
|
|
num_ops = 0;
|
|
|
|
// Create buffer for next area
|
|
de_ptr = std::make_unique<uint8_t[]>(exceptions_per_area_ *
|
|
sizeof(struct disk_exception));
|
|
memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
|
|
|
|
if (cowop_riter_->Done()) {
|
|
vec_.push_back(std::move(de_ptr));
|
|
}
|
|
}
|
|
|
|
data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
|
|
}
|
|
|
|
int num_ra_ops_per_iter = ((GetBufferDataSize()) / BLOCK_SZ);
|
|
std::optional<chunk_t> prev_id = {};
|
|
std::vector<const CowOperation*> vec;
|
|
std::set<uint64_t> dest_blocks;
|
|
std::set<uint64_t> source_blocks;
|
|
size_t pending_copy_ops = exceptions_per_area_ - num_ops;
|
|
uint64_t total_copy_ops = reader_->total_copy_ops();
|
|
|
|
SNAP_LOG(DEBUG) << " Processing copy-ops at Area: " << vec_.size()
|
|
<< " Number of replace/zero ops completed in this area: " << num_ops
|
|
<< " Pending copy ops for this area: " << pending_copy_ops;
|
|
while (!cowop_riter_->Done()) {
|
|
do {
|
|
const CowOperation* cow_op = &cowop_riter_->Get();
|
|
if (IsMetadataOp(*cow_op)) {
|
|
cowop_riter_->Next();
|
|
continue;
|
|
}
|
|
|
|
// We have two cases specific cases:
|
|
//
|
|
// =====================================================
|
|
// Case 1: Overlapping copy regions
|
|
//
|
|
// Ex:
|
|
//
|
|
// Source -> Destination
|
|
//
|
|
// 1: 15 -> 18
|
|
// 2: 16 -> 19
|
|
// 3: 17 -> 20
|
|
// 4: 18 -> 21
|
|
// 5: 19 -> 22
|
|
// 6: 20 -> 23
|
|
//
|
|
// We have 6 copy operations to be executed in OTA and there is a overlap. Update-engine
|
|
// will write to COW file as follows:
|
|
//
|
|
// Op-1: 20 -> 23
|
|
// Op-2: 19 -> 22
|
|
// Op-3: 18 -> 21
|
|
// Op-4: 17 -> 20
|
|
// Op-5: 16 -> 19
|
|
// Op-6: 15 -> 18
|
|
//
|
|
// Note that the blocks numbers are contiguous. Hence, all 6 copy
|
|
// operations can be batch merged. However, that will be
|
|
// problematic if we have a crash as block 20, 19, 18 would have
|
|
// been overwritten and hence subsequent recovery may end up with
|
|
// a silent data corruption when op-1, op-2 and op-3 are
|
|
// re-executed.
|
|
//
|
|
// To address the above problem, read-ahead thread will
|
|
// read all the 6 source blocks, cache them in the scratch
|
|
// space of the COW file. During merge, read-ahead
|
|
// thread will serve the blocks from the read-ahead cache.
|
|
// If there is a crash during merge; on subsequent reboot,
|
|
// read-ahead thread will recover the data from the
|
|
// scratch space and re-construct it thereby there
|
|
// is no loss of data.
|
|
//
|
|
// Note that we will follow the same order of COW operations
|
|
// as present in the COW file. This will make sure that\
|
|
// the merge of operations are done based on the ops present
|
|
// in the file.
|
|
//===========================================================
|
|
if (prev_id.has_value()) {
|
|
if (dest_blocks.count(cow_op->new_block) || source_blocks.count(cow_op->source)) {
|
|
break;
|
|
}
|
|
}
|
|
metadata_found = true;
|
|
pending_copy_ops -= 1;
|
|
vec.push_back(cow_op);
|
|
dest_blocks.insert(cow_op->source);
|
|
source_blocks.insert(cow_op->new_block);
|
|
prev_id = cow_op->new_block;
|
|
cowop_riter_->Next();
|
|
} while (!cowop_riter_->Done() && pending_copy_ops);
|
|
|
|
data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
|
|
SNAP_LOG(DEBUG) << "Batch Merge copy-ops of size: " << vec.size()
|
|
<< " Area: " << vec_.size() << " Area offset: " << offset
|
|
<< " Pending-copy-ops in this area: " << pending_copy_ops;
|
|
|
|
for (size_t i = 0; i < vec.size(); i++) {
|
|
struct disk_exception* de =
|
|
reinterpret_cast<struct disk_exception*>((char*)de_ptr.get() + offset);
|
|
const CowOperation* cow_op = vec[i];
|
|
|
|
de->old_chunk = cow_op->new_block;
|
|
de->new_chunk = data_chunk_id;
|
|
|
|
// Store operation pointer.
|
|
chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), cow_op));
|
|
offset += sizeof(struct disk_exception);
|
|
num_ops += 1;
|
|
copy_ops++;
|
|
if (read_ahead_feature_) {
|
|
read_ahead_ops_.push_back(cow_op);
|
|
}
|
|
|
|
SNAP_LOG(DEBUG) << num_ops << ":"
|
|
<< " Copy-op: "
|
|
<< " Old-chunk: " << de->old_chunk << " New-chunk: " << de->new_chunk;
|
|
|
|
if (num_ops == exceptions_per_area_) {
|
|
// Store it in vector at the right index. This maps the chunk-id to
|
|
// vector index.
|
|
vec_.push_back(std::move(de_ptr));
|
|
num_ops = 0;
|
|
offset = 0;
|
|
|
|
// Create buffer for next area
|
|
de_ptr = std::make_unique<uint8_t[]>(exceptions_per_area_ *
|
|
sizeof(struct disk_exception));
|
|
memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
|
|
|
|
if (cowop_riter_->Done()) {
|
|
vec_.push_back(std::move(de_ptr));
|
|
SNAP_LOG(DEBUG) << "ReadMetadata() completed; Number of Areas: " << vec_.size();
|
|
}
|
|
|
|
if (!(pending_copy_ops == 0)) {
|
|
SNAP_LOG(ERROR)
|
|
<< "Invalid pending_copy_ops: expected: 0 found: " << pending_copy_ops;
|
|
return false;
|
|
}
|
|
pending_copy_ops = exceptions_per_area_;
|
|
}
|
|
|
|
data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
|
|
total_copy_ops -= 1;
|
|
/*
|
|
* Split the number of ops based on the size of read-ahead buffer
|
|
* region. We need to ensure that kernel doesn't issue IO on blocks
|
|
* which are not read by the read-ahead thread.
|
|
*/
|
|
if (read_ahead_feature_ && (total_copy_ops % num_ra_ops_per_iter == 0)) {
|
|
data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
|
|
}
|
|
}
|
|
vec.clear();
|
|
dest_blocks.clear();
|
|
source_blocks.clear();
|
|
prev_id.reset();
|
|
}
|
|
|
|
// Partially filled area or there is no metadata
|
|
// If there is no metadata, fill with zero so that kernel
|
|
// is aware that merge is completed.
|
|
if (num_ops || !metadata_found) {
|
|
vec_.push_back(std::move(de_ptr));
|
|
SNAP_LOG(DEBUG) << "ReadMetadata() completed. Partially filled area num_ops: " << num_ops
|
|
<< "Areas : " << vec_.size();
|
|
}
|
|
|
|
chunk_vec_.shrink_to_fit();
|
|
vec_.shrink_to_fit();
|
|
read_ahead_ops_.shrink_to_fit();
|
|
|
|
// Sort the vector based on sectors as we need this during un-aligned access
|
|
std::sort(chunk_vec_.begin(), chunk_vec_.end(), compare);
|
|
|
|
SNAP_LOG(INFO) << "ReadMetadata completed. Final-chunk-id: " << data_chunk_id
|
|
<< " Num Sector: " << ChunkToSector(data_chunk_id)
|
|
<< " Replace-ops: " << replace_ops << " Zero-ops: " << zero_ops
|
|
<< " Copy-ops: " << copy_ops << " Areas: " << vec_.size()
|
|
<< " Num-ops-merged: " << header.num_merge_ops
|
|
<< " Total-data-ops: " << reader_->total_data_ops();
|
|
|
|
// Total number of sectors required for creating dm-user device
|
|
num_sectors_ = ChunkToSector(data_chunk_id);
|
|
merge_initiated_ = false;
|
|
PrepareReadAhead();
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Snapuserd::MmapMetadata() {
|
|
CowHeader header;
|
|
reader_->GetHeader(&header);
|
|
|
|
if (header.major_version >= 2 && header.buffer_size > 0) {
|
|
total_mapped_addr_length_ = header.header_size + BUFFER_REGION_DEFAULT_SIZE;
|
|
read_ahead_feature_ = true;
|
|
} else {
|
|
// mmap the first 4k page - older COW format
|
|
total_mapped_addr_length_ = BLOCK_SZ;
|
|
read_ahead_feature_ = false;
|
|
}
|
|
|
|
mapped_addr_ = mmap(NULL, total_mapped_addr_length_, PROT_READ | PROT_WRITE, MAP_SHARED,
|
|
cow_fd_.get(), 0);
|
|
if (mapped_addr_ == MAP_FAILED) {
|
|
SNAP_LOG(ERROR) << "mmap metadata failed";
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void Snapuserd::UnmapBufferRegion() {
|
|
int ret = munmap(mapped_addr_, total_mapped_addr_length_);
|
|
if (ret < 0) {
|
|
SNAP_PLOG(ERROR) << "munmap failed";
|
|
}
|
|
}
|
|
|
|
void MyLogger(android::base::LogId, android::base::LogSeverity severity, const char*, const char*,
|
|
unsigned int, const char* message) {
|
|
if (severity == android::base::ERROR) {
|
|
fprintf(stderr, "%s\n", message);
|
|
} else {
|
|
fprintf(stdout, "%s\n", message);
|
|
}
|
|
}
|
|
|
|
bool Snapuserd::InitCowDevice() {
|
|
cow_fd_.reset(open(cow_device_.c_str(), O_RDWR));
|
|
if (cow_fd_ < 0) {
|
|
SNAP_PLOG(ERROR) << "Open Failed: " << cow_device_;
|
|
return false;
|
|
}
|
|
|
|
return ReadMetadata();
|
|
}
|
|
|
|
/*
|
|
* Entry point to launch threads
|
|
*/
|
|
bool Snapuserd::Start() {
|
|
std::vector<std::future<bool>> threads;
|
|
std::future<bool> ra_thread;
|
|
bool rathread = (read_ahead_feature_ && (read_ahead_ops_.size() > 0));
|
|
|
|
// Start the read-ahead thread and wait
|
|
// for it as the data has to be re-constructed
|
|
// from COW device.
|
|
if (rathread) {
|
|
ra_thread = std::async(std::launch::async, &ReadAheadThread::RunThread,
|
|
read_ahead_thread_.get());
|
|
if (!WaitForReadAheadToStart()) {
|
|
SNAP_LOG(ERROR) << "Failed to start Read-ahead thread...";
|
|
return false;
|
|
}
|
|
|
|
SNAP_LOG(INFO) << "Read-ahead thread started...";
|
|
}
|
|
|
|
// Launch worker threads
|
|
for (int i = 0; i < worker_threads_.size(); i++) {
|
|
threads.emplace_back(
|
|
std::async(std::launch::async, &WorkerThread::RunThread, worker_threads_[i].get()));
|
|
}
|
|
|
|
bool ret = true;
|
|
for (auto& t : threads) {
|
|
ret = t.get() && ret;
|
|
}
|
|
|
|
if (rathread) {
|
|
// Notify the read-ahead thread that all worker threads
|
|
// are done. We need this explicit notification when
|
|
// there is an IO failure or there was a switch
|
|
// of dm-user table; thus, forcing the read-ahead
|
|
// thread to wake up.
|
|
MergeCompleted();
|
|
ret = ret && ra_thread.get();
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
uint64_t Snapuserd::GetBufferMetadataOffset() {
|
|
CowHeader header;
|
|
reader_->GetHeader(&header);
|
|
|
|
size_t size = header.header_size + sizeof(BufferState);
|
|
return size;
|
|
}
|
|
|
|
/*
|
|
* Metadata for read-ahead is 16 bytes. For a 2 MB region, we will
|
|
* end up with 8k (2 PAGE) worth of metadata. Thus, a 2MB buffer
|
|
* region is split into:
|
|
*
|
|
* 1: 8k metadata
|
|
*
|
|
*/
|
|
size_t Snapuserd::GetBufferMetadataSize() {
|
|
CowHeader header;
|
|
reader_->GetHeader(&header);
|
|
|
|
size_t metadata_bytes = (header.buffer_size * sizeof(struct ScratchMetadata)) / BLOCK_SZ;
|
|
return metadata_bytes;
|
|
}
|
|
|
|
size_t Snapuserd::GetBufferDataOffset() {
|
|
CowHeader header;
|
|
reader_->GetHeader(&header);
|
|
|
|
return (header.header_size + GetBufferMetadataSize());
|
|
}
|
|
|
|
/*
|
|
* (2MB - 8K = 2088960 bytes) will be the buffer region to hold the data.
|
|
*/
|
|
size_t Snapuserd::GetBufferDataSize() {
|
|
CowHeader header;
|
|
reader_->GetHeader(&header);
|
|
|
|
size_t size = header.buffer_size - GetBufferMetadataSize();
|
|
return size;
|
|
}
|
|
|
|
struct BufferState* Snapuserd::GetBufferState() {
|
|
CowHeader header;
|
|
reader_->GetHeader(&header);
|
|
|
|
struct BufferState* ra_state =
|
|
reinterpret_cast<struct BufferState*>((char*)mapped_addr_ + header.header_size);
|
|
return ra_state;
|
|
}
|
|
|
|
} // namespace snapshot
|
|
} // namespace android
|