You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
174 lines
6.6 KiB
174 lines
6.6 KiB
/*
|
|
* Copyright 2019 The libgav1 Authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
|
|
#define LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <memory>
|
|
#include <mutex> // NOLINT (unapproved c++11 header)
|
|
#include <new>
|
|
#include <utility>
|
|
|
|
#include "src/dsp/constants.h"
|
|
#include "src/utils/common.h"
|
|
#include "src/utils/compiler_attributes.h"
|
|
#include "src/utils/constants.h"
|
|
#include "src/utils/memory.h"
|
|
#include "src/utils/stack.h"
|
|
|
|
namespace libgav1 {
|
|
|
|
// Buffer to facilitate decoding a superblock.
|
|
struct TileScratchBuffer : public MaxAlignedAllocable {
|
|
static constexpr int kBlockDecodedStride = 34;
|
|
|
|
LIBGAV1_MUST_USE_RESULT bool Init(int bitdepth) {
|
|
#if LIBGAV1_MAX_BITDEPTH >= 10
|
|
const int pixel_size = (bitdepth == 8) ? 1 : 2;
|
|
#else
|
|
assert(bitdepth == 8);
|
|
static_cast<void>(bitdepth);
|
|
const int pixel_size = 1;
|
|
#endif
|
|
|
|
static_assert(kConvolveScaleBorderRight >= kConvolveBorderRight, "");
|
|
constexpr int unaligned_convolve_buffer_stride =
|
|
kMaxScaledSuperBlockSizeInPixels + kConvolveBorderLeftTop +
|
|
kConvolveScaleBorderRight;
|
|
convolve_block_buffer_stride = Align<ptrdiff_t>(
|
|
unaligned_convolve_buffer_stride * pixel_size, kMaxAlignment);
|
|
constexpr int convolve_buffer_height = kMaxScaledSuperBlockSizeInPixels +
|
|
kConvolveBorderLeftTop +
|
|
kConvolveBorderBottom;
|
|
|
|
convolve_block_buffer = MakeAlignedUniquePtr<uint8_t>(
|
|
kMaxAlignment, convolve_buffer_height * convolve_block_buffer_stride);
|
|
#if LIBGAV1_MSAN
|
|
// Quiet msan warnings in ConvolveScale2D_NEON(). Set with random non-zero
|
|
// value to aid in future debugging.
|
|
memset(convolve_block_buffer.get(), 0x66,
|
|
convolve_buffer_height * convolve_block_buffer_stride);
|
|
#endif
|
|
|
|
return convolve_block_buffer != nullptr;
|
|
}
|
|
|
|
// kCompoundPredictionTypeDiffWeighted prediction mode needs a mask of the
|
|
// prediction block size. This buffer is used to store that mask. The masks
|
|
// will be created for the Y plane and will be re-used for the U & V planes.
|
|
alignas(kMaxAlignment) uint8_t weight_mask[kMaxSuperBlockSizeSquareInPixels];
|
|
|
|
// For each instance of the TileScratchBuffer, only one of the following
|
|
// buffers will be used at any given time, so it is ok to share them in a
|
|
// union.
|
|
union {
|
|
// Buffers used for prediction process.
|
|
// Compound prediction calculations always output 16-bit values. Depending
|
|
// on the bitdepth the values may be treated as int16_t or uint16_t. See
|
|
// src/dsp/convolve.cc and src/dsp/warp.cc for explanations.
|
|
// Inter/intra calculations output Pixel values.
|
|
// These buffers always use width as the stride. This enables packing the
|
|
// values in and simplifies loads/stores for small values.
|
|
|
|
// 10/12 bit compound prediction and 10/12 bit inter/intra prediction.
|
|
alignas(kMaxAlignment) uint16_t
|
|
prediction_buffer[2][kMaxSuperBlockSizeSquareInPixels];
|
|
// 8 bit compound prediction buffer.
|
|
alignas(kMaxAlignment) int16_t
|
|
compound_prediction_buffer_8bpp[2][kMaxSuperBlockSizeSquareInPixels];
|
|
|
|
// Union usage note: This is used only by functions in the "intra"
|
|
// prediction path.
|
|
//
|
|
// Buffer used for storing subsampled luma samples needed for CFL
|
|
// prediction. This buffer is used to avoid repetition of the subsampling
|
|
// for the V plane when it is already done for the U plane.
|
|
int16_t cfl_luma_buffer[kCflLumaBufferStride][kCflLumaBufferStride];
|
|
};
|
|
|
|
// Buffer used for convolve. The maximum size required for this buffer is:
|
|
// maximum block height (with scaling and border) = 2 * 128 + 3 + 4 = 263.
|
|
// maximum block stride (with scaling and border aligned to 16) =
|
|
// (2 * 128 + 3 + 8 + 5) * pixel_size = 272 * pixel_size.
|
|
// Where pixel_size is (bitdepth == 8) ? 1 : 2.
|
|
// Has an alignment of kMaxAlignment when allocated.
|
|
AlignedUniquePtr<uint8_t> convolve_block_buffer;
|
|
ptrdiff_t convolve_block_buffer_stride;
|
|
|
|
// Flag indicating whether the data in |cfl_luma_buffer| is valid.
|
|
bool cfl_luma_buffer_valid;
|
|
|
|
// Equivalent to BlockDecoded array in the spec. This stores the decoded
|
|
// state of every 4x4 block in a superblock. It has 1 row/column border on
|
|
// all 4 sides (hence the 34x34 dimension instead of 32x32). Note that the
|
|
// spec uses "-1" as an index to access the left and top borders. In the
|
|
// code, we treat the index (1, 1) as equivalent to the spec's (0, 0). So
|
|
// all accesses into this array will be offset by +1 when compared with the
|
|
// spec.
|
|
bool block_decoded[kMaxPlanes][kBlockDecodedStride][kBlockDecodedStride];
|
|
};
|
|
|
|
class TileScratchBufferPool {
|
|
public:
|
|
void Reset(int bitdepth) {
|
|
if (bitdepth_ == bitdepth) return;
|
|
#if LIBGAV1_MAX_BITDEPTH >= 10
|
|
if (bitdepth_ == 8 && bitdepth != 8) {
|
|
// We are going from a pixel size of 1 to a pixel size of 2. So invalidate
|
|
// the stack.
|
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
while (!buffers_.Empty()) {
|
|
buffers_.Pop();
|
|
}
|
|
}
|
|
#endif
|
|
bitdepth_ = bitdepth;
|
|
}
|
|
|
|
std::unique_ptr<TileScratchBuffer> Get() {
|
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
if (buffers_.Empty()) {
|
|
std::unique_ptr<TileScratchBuffer> scratch_buffer(new (std::nothrow)
|
|
TileScratchBuffer);
|
|
if (scratch_buffer == nullptr || !scratch_buffer->Init(bitdepth_)) {
|
|
return nullptr;
|
|
}
|
|
return scratch_buffer;
|
|
}
|
|
return buffers_.Pop();
|
|
}
|
|
|
|
void Release(std::unique_ptr<TileScratchBuffer> scratch_buffer) {
|
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
buffers_.Push(std::move(scratch_buffer));
|
|
}
|
|
|
|
private:
|
|
std::mutex mutex_;
|
|
// We will never need more than kMaxThreads scratch buffers since that is the
|
|
// maximum amount of work that will be done at any given time.
|
|
Stack<std::unique_ptr<TileScratchBuffer>, kMaxThreads> buffers_
|
|
LIBGAV1_GUARDED_BY(mutex_);
|
|
int bitdepth_ = 0;
|
|
};
|
|
|
|
} // namespace libgav1
|
|
|
|
#endif // LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
|