mirror of
https://github.com/ScrelliCopter/VGM-Tools
synced 2025-02-21 04:09:25 +11:00
559 lines
12 KiB
C
559 lines
12 KiB
C
/* (c) 2017 Alex Barney (MIT) */
|
|
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <stdbool.h>
|
|
#include <math.h>
|
|
#include <float.h>
|
|
#include <string.h>
|
|
#include "../common.h"
|
|
#include "dsptool.h"
|
|
|
|
/* Temporal Vector
|
|
* A contiguous history of 3 samples starting with
|
|
* 'current' and going 2 backwards
|
|
*/
|
|
typedef double tvec[3];
|
|
static void DSPEncodeFrame(short pcmInOut[16], int sampleCount, unsigned char adpcmOut[8], const short coefsIn[8][2]);
|
|
|
|
void encode(int16_t* src, uint8_t* dst, ADPCMINFO* cxt, uint32_t samples)
|
|
{
|
|
int16_t* coefs = cxt->coef;
|
|
correlateCoefs(src, samples, coefs);
|
|
|
|
uint32_t frameCount = samples / SAMPLES_PER_FRAME + (samples % SAMPLES_PER_FRAME != 0);
|
|
|
|
int16_t* pcm = src;
|
|
uint8_t* adpcm = dst;
|
|
int16_t pcmFrame[SAMPLES_PER_FRAME + 2] = { 0 };
|
|
uint8_t adpcmFrame[BYTES_PER_FRAME] = { 0 };
|
|
|
|
for (uint32_t i = 0; i < frameCount; ++i, pcm += SAMPLES_PER_FRAME, adpcm += BYTES_PER_FRAME)
|
|
{
|
|
uint32_t sampleCount = MIN(samples - i * SAMPLES_PER_FRAME, SAMPLES_PER_FRAME);
|
|
memset(pcmFrame + 2, 0, SAMPLES_PER_FRAME * sizeof(int16_t));
|
|
memcpy(pcmFrame + 2, pcm, sampleCount * sizeof(int16_t));
|
|
|
|
DSPEncodeFrame(pcmFrame, SAMPLES_PER_FRAME, adpcmFrame, (const short(*)[2])&coefs[0]);
|
|
|
|
pcmFrame[0] = pcmFrame[14];
|
|
pcmFrame[1] = pcmFrame[15];
|
|
|
|
memcpy(adpcm, adpcmFrame, getBytesForAdpcmSamples(sampleCount));
|
|
}
|
|
|
|
cxt->gain = 0;
|
|
cxt->pred_scale = *dst;
|
|
cxt->yn1 = 0;
|
|
cxt->yn2 = 0;
|
|
}
|
|
|
|
static void InnerProductMerge(tvec vecOut, const short pcmBuf[14])
|
|
{
|
|
for (int i = 0; i <= 2; i++)
|
|
{
|
|
vecOut[i] = 0.0f;
|
|
for (int x = 0; x < 14; x++)
|
|
vecOut[i] -= pcmBuf[x - i] * pcmBuf[x];
|
|
}
|
|
}
|
|
|
|
static void OuterProductMerge(tvec mtxOut[3], const short pcmBuf[14])
|
|
{
|
|
for (int x = 1; x <= 2; x++)
|
|
for (int y = 1; y <= 2; y++)
|
|
{
|
|
mtxOut[x][y] = 0.0;
|
|
for (int z = 0; z < 14; z++)
|
|
mtxOut[x][y] += pcmBuf[z - x] * pcmBuf[z - y];
|
|
}
|
|
}
|
|
|
|
static bool AnalyzeRanges(tvec mtx[3], int* vecIdxsOut)
|
|
{
|
|
double recips[3];
|
|
double val, tmp, min, max;
|
|
|
|
// Get greatest distance from zero
|
|
for (int x = 1; x <= 2; x++)
|
|
{
|
|
val = MAX(fabs(mtx[x][1]), fabs(mtx[x][2]));
|
|
if (val < DBL_EPSILON)
|
|
return true;
|
|
|
|
recips[x] = 1.0 / val;
|
|
}
|
|
|
|
int maxIndex = 0;
|
|
for (int i = 1; i <= 2; i++)
|
|
{
|
|
for (int x = 1; x < i; x++)
|
|
{
|
|
tmp = mtx[x][i];
|
|
for (int y = 1; y < x; y++)
|
|
tmp -= mtx[x][y] * mtx[y][i];
|
|
mtx[x][i] = tmp;
|
|
}
|
|
|
|
val = 0.0;
|
|
for (int x = i; x <= 2; x++)
|
|
{
|
|
tmp = mtx[x][i];
|
|
for (int y = 1; y < i; y++)
|
|
tmp -= mtx[x][y] * mtx[y][i];
|
|
|
|
mtx[x][i] = tmp;
|
|
tmp = fabs(tmp) * recips[x];
|
|
if (tmp >= val)
|
|
{
|
|
val = tmp;
|
|
maxIndex = x;
|
|
}
|
|
}
|
|
|
|
if (maxIndex != i)
|
|
{
|
|
for (int y = 1; y <= 2; y++)
|
|
{
|
|
tmp = mtx[maxIndex][y];
|
|
mtx[maxIndex][y] = mtx[i][y];
|
|
mtx[i][y] = tmp;
|
|
}
|
|
recips[maxIndex] = recips[i];
|
|
}
|
|
|
|
vecIdxsOut[i] = maxIndex;
|
|
|
|
if (mtx[i][i] == 0.0)
|
|
return true;
|
|
|
|
if (i != 2)
|
|
{
|
|
tmp = 1.0 / mtx[i][i];
|
|
for (int x = i + 1; x <= 2; x++)
|
|
mtx[x][i] *= tmp;
|
|
}
|
|
}
|
|
|
|
// Get range
|
|
min = 1.0e10;
|
|
max = 0.0;
|
|
for (int i = 1; i <= 2; i++)
|
|
{
|
|
tmp = fabs(mtx[i][i]);
|
|
if (tmp < min)
|
|
min = tmp;
|
|
if (tmp > max)
|
|
max = tmp;
|
|
}
|
|
|
|
if (min / max < 1.0e-10)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
static void BidirectionalFilter(tvec mtx[3], const int* vecIdxs, tvec vecOut)
|
|
{
|
|
double tmp;
|
|
|
|
for (int i = 1, x = 0; i <= 2; i++)
|
|
{
|
|
int index = vecIdxs[i];
|
|
tmp = vecOut[index];
|
|
vecOut[index] = vecOut[i];
|
|
if (x != 0)
|
|
for (int y = x; y <= i - 1; y++)
|
|
tmp -= vecOut[y] * mtx[i][y];
|
|
else if (tmp != 0.0)
|
|
x = i;
|
|
vecOut[i] = tmp;
|
|
}
|
|
|
|
for (int i = 2; i > 0; i--)
|
|
{
|
|
tmp = vecOut[i];
|
|
for (int y = i + 1; y <= 2; y++)
|
|
tmp -= vecOut[y] * mtx[i][y];
|
|
vecOut[i] = tmp / mtx[i][i];
|
|
}
|
|
|
|
vecOut[0] = 1.0;
|
|
}
|
|
|
|
static bool QuadraticMerge(tvec inOutVec)
|
|
{
|
|
double v0, v1, v2 = inOutVec[2];
|
|
double tmp = 1.0 - (v2 * v2);
|
|
|
|
if (tmp == 0.0)
|
|
return true;
|
|
|
|
v0 = (inOutVec[0] - (v2 * v2)) / tmp;
|
|
v1 = (inOutVec[1] - (inOutVec[1] * v2)) / tmp;
|
|
|
|
inOutVec[0] = v0;
|
|
inOutVec[1] = v1;
|
|
|
|
return fabs(v1) > 1.0;
|
|
}
|
|
|
|
static void FinishRecord(tvec in, tvec out)
|
|
{
|
|
for (int z = 1; z <= 2; z++)
|
|
{
|
|
if (in[z] >= 1.0)
|
|
in[z] = 0.9999999999;
|
|
else if (in[z] <= -1.0)
|
|
in[z] = -0.9999999999;
|
|
}
|
|
out[0] = 1.0;
|
|
out[1] = (in[2] * in[1]) + in[1];
|
|
out[2] = in[2];
|
|
}
|
|
|
|
static void MatrixFilter(const tvec src, tvec dst)
|
|
{
|
|
tvec mtx[3];
|
|
|
|
mtx[2][0] = 1.0;
|
|
for (int i = 1; i <= 2; i++)
|
|
mtx[2][i] = -src[i];
|
|
|
|
for (int i = 2; i > 0; i--)
|
|
{
|
|
double val = 1.0 - (mtx[i][i] * mtx[i][i]);
|
|
for (int y = 1; y <= i; y++)
|
|
mtx[i - 1][y] = ((mtx[i][i] * mtx[i][y]) + mtx[i][y]) / val;
|
|
}
|
|
|
|
dst[0] = 1.0;
|
|
for (int i = 1; i <= 2; i++)
|
|
{
|
|
dst[i] = 0.0;
|
|
for (int y = 1; y <= i; y++)
|
|
dst[i] += mtx[i][y] * dst[i - y];
|
|
}
|
|
}
|
|
|
|
static void MergeFinishRecord(const tvec src, tvec dst)
|
|
{
|
|
tvec tmp;
|
|
double val = src[0];
|
|
|
|
dst[0] = 1.0;
|
|
for (int i = 1; i <= 2; i++)
|
|
{
|
|
double v2 = 0.0;
|
|
for (int y = 1; y < i; y++)
|
|
v2 += dst[y] * src[i - y];
|
|
|
|
if (val > 0.0)
|
|
dst[i] = -(v2 + src[i]) / val;
|
|
else
|
|
dst[i] = 0.0;
|
|
|
|
tmp[i] = dst[i];
|
|
|
|
for (int y = 1; y < i; y++)
|
|
dst[y] += dst[i] * dst[i - y];
|
|
|
|
val *= 1.0 - (dst[i] * dst[i]);
|
|
}
|
|
|
|
FinishRecord(tmp, dst);
|
|
}
|
|
|
|
static double ContrastVectors(const tvec source1, const tvec source2)
|
|
{
|
|
double val = (source2[2] * source2[1] + -source2[1]) / (1.0 - source2[2] * source2[2]);
|
|
double val1 = (source1[0] * source1[0]) + (source1[1] * source1[1]) + (source1[2] * source1[2]);
|
|
double val2 = (source1[0] * source1[1]) + (source1[1] * source1[2]);
|
|
double val3 = source1[0] * source1[2];
|
|
return val1 + (2.0 * val * val2) + (2.0 * (-source2[1] * val + -source2[2]) * val3);
|
|
}
|
|
|
|
static void FilterRecords(tvec vecBest[8], int exp, tvec records[], int recordCount)
|
|
{
|
|
tvec bufferList[8];
|
|
|
|
int buffer1[8];
|
|
tvec buffer2;
|
|
|
|
int index;
|
|
double value, tempVal = 0;
|
|
|
|
for (int x = 0; x < 2; x++)
|
|
{
|
|
for (int y = 0; y < exp; y++)
|
|
{
|
|
buffer1[y] = 0;
|
|
for (int i = 0; i <= 2; i++)
|
|
bufferList[y][i] = 0.0;
|
|
}
|
|
for (int z = 0; z < recordCount; z++)
|
|
{
|
|
index = 0;
|
|
value = 1.0e30;
|
|
for (int i = 0; i < exp; i++)
|
|
{
|
|
tempVal = ContrastVectors(vecBest[i], records[z]);
|
|
if (tempVal < value)
|
|
{
|
|
value = tempVal;
|
|
index = i;
|
|
}
|
|
}
|
|
buffer1[index]++;
|
|
MatrixFilter(records[z], buffer2);
|
|
for (int i = 0; i <= 2; i++)
|
|
bufferList[index][i] += buffer2[i];
|
|
}
|
|
|
|
for (int i = 0; i < exp; i++)
|
|
if (buffer1[i] > 0)
|
|
for (int y = 0; y <= 2; y++)
|
|
bufferList[i][y] /= buffer1[i];
|
|
|
|
for (int i = 0; i < exp; i++)
|
|
MergeFinishRecord(bufferList[i], vecBest[i]);
|
|
}
|
|
}
|
|
|
|
void correlateCoefs(int16_t* source, uint32_t samples, int16_t* coefsOut)
|
|
{
|
|
uint32_t numFrames = (samples + 13) / 14;
|
|
uint32_t frameSamples;
|
|
|
|
short* blockBuffer = (short*)calloc(sizeof(short), 0x3800);
|
|
short pcmHistBuffer[2][14] = { 0 };
|
|
|
|
tvec vec1;
|
|
tvec vec2;
|
|
|
|
tvec mtx[3];
|
|
int vecIdxs[3];
|
|
|
|
tvec* records = (tvec*)calloc(sizeof(tvec), numFrames * 2);
|
|
int recordCount = 0;
|
|
|
|
tvec vecBest[8];
|
|
|
|
// Iterate though 1024-block frames
|
|
for (uint32_t x = samples; x > 0;)
|
|
{
|
|
if (x > 0x3800) // Full 1024-block frame
|
|
{
|
|
frameSamples = 0x3800;
|
|
x -= 0x3800;
|
|
}
|
|
else // Partial frame
|
|
{
|
|
// Zero lingering block samples
|
|
frameSamples = x;
|
|
for (int z = 0; z < 14 && z + frameSamples < 0x3800; z++)
|
|
blockBuffer[frameSamples + z] = 0;
|
|
x = 0;
|
|
}
|
|
|
|
// Copy (potentially non-frame-aligned PCM samples into aligned buffer)
|
|
memcpy(blockBuffer, source, frameSamples * sizeof(short));
|
|
source += frameSamples;
|
|
|
|
|
|
for (uint32_t i = 0; i < frameSamples;)
|
|
{
|
|
for (int z = 0; z < 14; z++)
|
|
pcmHistBuffer[0][z] = pcmHistBuffer[1][z];
|
|
for (int z = 0; z < 14; z++)
|
|
pcmHistBuffer[1][z] = blockBuffer[i++];
|
|
|
|
InnerProductMerge(vec1, pcmHistBuffer[1]);
|
|
if (fabs(vec1[0]) > 10.0)
|
|
{
|
|
OuterProductMerge(mtx, pcmHistBuffer[1]);
|
|
if (!AnalyzeRanges(mtx, vecIdxs))
|
|
{
|
|
BidirectionalFilter(mtx, vecIdxs, vec1);
|
|
if (!QuadraticMerge(vec1))
|
|
{
|
|
FinishRecord(vec1, records[recordCount]);
|
|
recordCount++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
vec1[0] = 1.0;
|
|
vec1[1] = 0.0;
|
|
vec1[2] = 0.0;
|
|
|
|
for (int z = 0; z < recordCount; z++)
|
|
{
|
|
MatrixFilter(records[z], vecBest[0]);
|
|
for (int y = 1; y <= 2; y++)
|
|
vec1[y] += vecBest[0][y];
|
|
}
|
|
for (int y = 1; y <= 2; y++)
|
|
vec1[y] /= recordCount;
|
|
|
|
MergeFinishRecord(vec1, vecBest[0]);
|
|
|
|
|
|
int exp = 1;
|
|
for (int w = 0; w < 3;)
|
|
{
|
|
vec2[0] = 0.0;
|
|
vec2[1] = -1.0;
|
|
vec2[2] = 0.0;
|
|
for (int i = 0; i < exp; i++)
|
|
for (int y = 0; y <= 2; y++)
|
|
vecBest[exp + i][y] = (0.01 * vec2[y]) + vecBest[i][y];
|
|
++w;
|
|
exp = 1 << w;
|
|
FilterRecords(vecBest, exp, records, recordCount);
|
|
}
|
|
|
|
// Write output
|
|
for (int z = 0; z < 8; z++)
|
|
{
|
|
double d;
|
|
d = -vecBest[z][1] * 2048.0;
|
|
if (d > 0.0)
|
|
coefsOut[z * 2] = (d > 32767.0) ? (short)32767 : (short)lround(d);
|
|
else
|
|
coefsOut[z * 2] = (d < -32768.0) ? (short)-32768 : (short)lround(d);
|
|
|
|
d = -vecBest[z][2] * 2048.0;
|
|
if (d > 0.0)
|
|
coefsOut[z * 2 + 1] = (d > 32767.0) ? (short)32767 : (short)lround(d);
|
|
else
|
|
coefsOut[z * 2 + 1] = (d < -32768.0) ? (short)-32768 : (short)lround(d);
|
|
}
|
|
|
|
// Free memory
|
|
free(records);
|
|
free(blockBuffer);
|
|
}
|
|
|
|
// Make sure source includes the yn values (16 samples total)
|
|
void DSPEncodeFrame(short pcmInOut[16], int sampleCount, unsigned char adpcmOut[8], const short coefsIn[8][2])
|
|
{
|
|
int inSamples[8][16];
|
|
int outSamples[8][14];
|
|
|
|
int bestIndex = 0;
|
|
|
|
int scale[8];
|
|
double distAccum[8];
|
|
|
|
// Iterate through each coef set, finding the set with the smallest error
|
|
for (int i = 0; i < 8; i++)
|
|
{
|
|
int v1, v2, v3;
|
|
int distance, index;
|
|
|
|
// Set yn values
|
|
inSamples[i][0] = pcmInOut[0];
|
|
inSamples[i][1] = pcmInOut[1];
|
|
|
|
// Round and clamp samples for this coef set
|
|
distance = 0;
|
|
for (int s = 0; s < sampleCount; s++)
|
|
{
|
|
// Multiply previous samples by coefs
|
|
inSamples[i][s + 2] = v1 = ((pcmInOut[s] * coefsIn[i][1]) + (pcmInOut[s + 1] * coefsIn[i][0])) / 2048;
|
|
// Subtract from current sample
|
|
v2 = pcmInOut[s + 2] - v1;
|
|
// Clamp
|
|
v3 = (v2 >= 32767) ? 32767 : (v2 <= -32768) ? -32768 : v2;
|
|
// Compare distance
|
|
if (abs(v3) > abs(distance))
|
|
distance = v3;
|
|
}
|
|
|
|
// Set initial scale
|
|
for (scale[i] = 0; (scale[i] <= 12) && ((distance > 7) || (distance < -8)); scale[i]++, distance /= 2)
|
|
{
|
|
}
|
|
scale[i] = (scale[i] <= 1) ? -1 : scale[i] - 2;
|
|
|
|
do
|
|
{
|
|
scale[i]++;
|
|
distAccum[i] = 0;
|
|
index = 0;
|
|
|
|
for (int s = 0; s < sampleCount; s++)
|
|
{
|
|
// Multiply previous
|
|
v1 = ((inSamples[i][s] * coefsIn[i][1]) + (inSamples[i][s + 1] * coefsIn[i][0]));
|
|
// Evaluate from real sample
|
|
v2 = (pcmInOut[s + 2] << 11) - v1;
|
|
// Round to nearest sample
|
|
v3 = (v2 > 0) ? (int)((double)v2 / (1 << scale[i]) / 2048 + 0.4999999f) : (int)((double)v2 / (1 << scale[i]) / 2048 - 0.4999999f);
|
|
|
|
// Clamp sample and set index
|
|
if (v3 < -8)
|
|
{
|
|
if (index < (v3 = -8 - v3))
|
|
index = v3;
|
|
v3 = -8;
|
|
}
|
|
else if (v3 > 7)
|
|
{
|
|
if (index < (v3 -= 7))
|
|
index = v3;
|
|
v3 = 7;
|
|
}
|
|
|
|
// Store result
|
|
outSamples[i][s] = v3;
|
|
|
|
// Round and expand
|
|
v1 = (v1 + ((v3 * (1 << scale[i])) << 11) + 1024) >> 11;
|
|
// Clamp and store
|
|
inSamples[i][s + 2] = v2 = (v1 >= 32767) ? 32767 : (v1 <= -32768) ? -32768 : v1;
|
|
// Accumulate distance
|
|
v3 = pcmInOut[s + 2] - v2;
|
|
distAccum[i] += v3 * (double)v3;
|
|
}
|
|
|
|
for (int x = index + 8; x > 256; x >>= 1)
|
|
if (++scale[i] >= 12)
|
|
scale[i] = 11;
|
|
} while ((scale[i] < 12) && (index > 1));
|
|
}
|
|
|
|
double min = DBL_MAX;
|
|
for (int i = 0; i < 8; i++)
|
|
{
|
|
if (distAccum[i] < min)
|
|
{
|
|
min = distAccum[i];
|
|
bestIndex = i;
|
|
}
|
|
}
|
|
|
|
// Write converted samples
|
|
for (int s = 0; s < sampleCount; s++)
|
|
pcmInOut[s + 2] = (short)inSamples[bestIndex][s + 2];
|
|
|
|
// Write ps
|
|
adpcmOut[0] = (char)((bestIndex << 4) | (scale[bestIndex] & 0xF));
|
|
|
|
// Zero remaining samples
|
|
for (int s = sampleCount; s < 14; s++)
|
|
outSamples[bestIndex][s] = 0;
|
|
|
|
// Write output samples
|
|
for (int y = 0; y < 7; y++)
|
|
adpcmOut[y + 1] = (char)((outSamples[bestIndex][y * 2] << 4) | (outSamples[bestIndex][y * 2 + 1] & 0xF));
|
|
}
|
|
|
|
void encodeFrame(int16_t* src, uint8_t* dst, int16_t* coefs, uint8_t one)
|
|
{
|
|
DSPEncodeFrame(src, 14, dst, (const short(*)[2])&coefs[0]);
|
|
}
|