Files
ANSCORE/MediaClient/media/audio_encoder.cpp

578 lines
14 KiB
C++
Raw Permalink Normal View History

2026-03-28 16:54:11 +11:00
/***************************************************************************************
*
* IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
* Copyright (C) 2014-2024, Happytimesoft Corporation, all rights reserved.
*
* Redistribution and use in binary forms, with or without modification, are permitted.
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the specific
* language governing permissions and limitations under the License.
*
****************************************************************************************/
#include "sys_inc.h"
#include "audio_encoder.h"
#include "media_format.h"
#include "avcodec_mutex.h"
#include "media_codec.h"
CAudioEncoder::CAudioEncoder()
{
m_nCodecId = AV_CODEC_ID_NONE;
m_bInited = FALSE;
memset(&m_EncoderParams, 0, sizeof(AudioEncoderParam));
m_pCodecCtx = NULL;
m_pFrame = NULL;
m_pResampleFrame = NULL;
m_pSwrCtx = NULL;
m_pPkt = NULL;
memset(&m_AudioBuffer, 0, sizeof(AudioBuffer));
m_pCallbackMutex = sys_os_create_mutex();
m_pCallbackList = h_list_create(FALSE);
}
CAudioEncoder::~CAudioEncoder()
{
uninit();
h_list_free_container(m_pCallbackList);
sys_os_destroy_sig_mutex(m_pCallbackMutex);
}
int CAudioEncoder::computeBitrate(AVCodecID codec, int samplerate, int channels, int quality)
{
int bitrate;
if (m_nCodecId == AV_CODEC_ID_ADPCM_G726)
{
bitrate = 16000; // G726 16kbit/s
}
else if (m_nCodecId == AV_CODEC_ID_ADPCM_G722)
{
bitrate = 64000; // G722 64kbit/s
}
else if (m_nCodecId == AV_CODEC_ID_PCM_ALAW || m_nCodecId == AV_CODEC_ID_PCM_MULAW)
{
bitrate = samplerate * channels * 8;
}
else if (m_nCodecId == AV_CODEC_ID_AAC)
{
bitrate = samplerate * channels * 16 / 7;
}
else
{
bitrate = samplerate * channels;
}
return bitrate;
}
BOOL CAudioEncoder::init(AudioEncoderParam * params)
{
memcpy(&m_EncoderParams, params, sizeof(AudioEncoderParam));
m_nCodecId = to_audio_avcodecid(params->DstCodec);
if (AV_CODEC_ID_AAC == m_nCodecId)
{
// the ffmepg AAC encoder only support AV_SAMPLE_FMT_FLTP
m_EncoderParams.DstSamplefmt = AV_SAMPLE_FMT_FLTP;
}
const AVCodec * pCodec = avcodec_find_encoder(m_nCodecId);
if (pCodec == NULL)
{
log_print(HT_LOG_ERR, "avcodec_find_encoder failed, %d\r\n", m_nCodecId);
return FALSE;
}
m_pCodecCtx = avcodec_alloc_context3(pCodec);
if (m_pCodecCtx == NULL)
{
log_print(HT_LOG_ERR, "avcodec_alloc_context3 failed\r\n");
return FALSE;
}
m_pCodecCtx->codec_id = m_nCodecId;
m_pCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
m_pCodecCtx->qblur = 0.5f;
m_pCodecCtx->time_base.num = 1;
m_pCodecCtx->time_base.den = m_EncoderParams.DstSamplerate;
m_pCodecCtx->sample_rate = m_EncoderParams.DstSamplerate;
m_pCodecCtx->channels = m_EncoderParams.DstChannels;
m_pCodecCtx->channel_layout = av_get_default_channel_layout(m_EncoderParams.DstChannels);
m_pCodecCtx->sample_fmt = m_EncoderParams.DstSamplefmt;
if (m_EncoderParams.DstBitrate > 0)
{
m_pCodecCtx->bit_rate = m_EncoderParams.DstBitrate * 1000;
}
else
{
m_pCodecCtx->bit_rate = computeBitrate(m_nCodecId, m_EncoderParams.DstSamplerate, m_EncoderParams.DstChannels, 80);
}
m_pCodecCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
av_opt_set(m_pCodecCtx->priv_data, "preset", "superfast", 0);
av_opt_set(m_pCodecCtx->priv_data, "tune", "zerolatency", 0);
if (AV_CODEC_ID_AAC == m_nCodecId)
{
m_pCodecCtx->profile = FF_PROFILE_AAC_LOW; // AAC -LC
}
else if (AV_CODEC_ID_ADPCM_G726 == m_nCodecId)
{
m_pCodecCtx->bits_per_coded_sample = m_pCodecCtx->bit_rate / 8000;
m_pCodecCtx->bit_rate = m_pCodecCtx->bits_per_coded_sample * 8000;
}
if (avcodec_thread_open(m_pCodecCtx, pCodec, NULL) < 0)
{
log_print(HT_LOG_ERR, "avcodec_thread_open failed, audio encoder\r\n");
return FALSE;
}
m_pFrame = av_frame_alloc();
if (NULL == m_pFrame)
{
return FALSE;
}
if (m_EncoderParams.SrcSamplerate != m_EncoderParams.DstSamplerate ||
m_EncoderParams.SrcChannels != m_EncoderParams.DstChannels ||
m_EncoderParams.SrcSamplefmt != m_EncoderParams.DstSamplefmt)
{
m_pSwrCtx = swr_alloc_set_opts(NULL,
av_get_default_channel_layout(m_EncoderParams.DstChannels), m_EncoderParams.DstSamplefmt, m_EncoderParams.DstSamplerate,
av_get_default_channel_layout(m_EncoderParams.SrcChannels), m_EncoderParams.SrcSamplefmt, m_EncoderParams.SrcSamplerate, 0, NULL);
swr_init(m_pSwrCtx);
}
if (m_pCodecCtx->frame_size == 0)
{
m_pCodecCtx->frame_size = 1024;
}
m_AudioBuffer.tlen = 64 * m_pCodecCtx->frame_size * m_EncoderParams.DstChannels * av_get_bytes_per_sample(m_EncoderParams.DstSamplefmt);
m_AudioBuffer.data[0] = (uint8 *)av_malloc(m_AudioBuffer.tlen);
m_AudioBuffer.data[1] = (uint8 *)av_malloc(m_AudioBuffer.tlen);
m_AudioBuffer.size[0] = 0;
m_AudioBuffer.size[1] = 0;
m_AudioBuffer.samples = 0;
/* packet for holding encoded output */
m_pPkt = av_packet_alloc();
if (!m_pPkt)
{
log_print(HT_LOG_ERR, "could not allocate the packet\r\n");
return FALSE;
}
m_bInited = TRUE;
return TRUE;
}
void CAudioEncoder::uninit()
{
flush();
if (m_pCodecCtx)
{
avcodec_thread_close(m_pCodecCtx);
avcodec_free_context(&m_pCodecCtx);
}
if (m_pFrame)
{
av_frame_free(&m_pFrame);
}
if (m_pResampleFrame)
{
av_frame_free(&m_pResampleFrame);
}
if (m_pSwrCtx)
{
swr_free(&m_pSwrCtx);
}
if (m_pPkt)
{
av_packet_free(&m_pPkt);
}
if (m_AudioBuffer.data[0])
{
av_freep(&m_AudioBuffer.data[0]);
}
if (m_AudioBuffer.data[1])
{
av_freep(&m_AudioBuffer.data[1]);
}
m_bInited = FALSE;
}
BOOL CAudioEncoder::bufferFrame(AVFrame * pFrame)
{
BOOL ret = TRUE;
int samplesize = av_get_bytes_per_sample((AVSampleFormat)pFrame->format);
int size = pFrame->nb_samples * samplesize;
assert(m_AudioBuffer.size[0] + size <= m_AudioBuffer.tlen);
if (av_sample_fmt_is_planar((AVSampleFormat)pFrame->format) && m_EncoderParams.DstChannels > 1)
{
memcpy(m_AudioBuffer.data[0]+m_AudioBuffer.size[0], pFrame->data[0], size);
m_AudioBuffer.size[0] += size;
memcpy(m_AudioBuffer.data[1]+m_AudioBuffer.size[1], pFrame->data[1], size);
m_AudioBuffer.size[1] += size;
}
else
{
memcpy(m_AudioBuffer.data[0]+m_AudioBuffer.size[0], pFrame->data[0], size * m_EncoderParams.DstChannels);
m_AudioBuffer.size[0] += size * m_EncoderParams.DstChannels;
}
m_AudioBuffer.samples += pFrame->nb_samples;
while (m_AudioBuffer.samples >= m_pCodecCtx->frame_size)
{
int linesize;
if (av_sample_fmt_is_planar((AVSampleFormat)pFrame->format) && m_EncoderParams.DstChannels > 1)
{
linesize = samplesize * m_pCodecCtx->frame_size;
}
else
{
linesize = samplesize * m_pCodecCtx->frame_size * m_EncoderParams.DstChannels;
}
m_pFrame->data[0] = m_AudioBuffer.data[0];
m_pFrame->data[1] = m_AudioBuffer.data[1];
m_pFrame->linesize[0] = linesize;
m_pFrame->linesize[1] = linesize;
m_pFrame->nb_samples = m_pCodecCtx->frame_size;
m_pFrame->format = m_EncoderParams.DstSamplefmt;
m_pFrame->key_frame = 1;
m_pFrame->sample_rate = m_EncoderParams.DstSamplerate;
m_pFrame->channels = m_EncoderParams.DstChannels;
m_pFrame->channel_layout = av_get_default_channel_layout(m_EncoderParams.DstChannels);
ret = encodeInternal(m_pFrame);
m_AudioBuffer.size[0] -= linesize;
if (m_AudioBuffer.size[0] > 0)
{
memmove(m_AudioBuffer.data[0], m_AudioBuffer.data[0]+linesize, m_AudioBuffer.size[0]);
}
if (av_sample_fmt_is_planar((AVSampleFormat)pFrame->format) && m_EncoderParams.DstChannels > 1)
{
m_AudioBuffer.size[1] -= linesize;
if (m_AudioBuffer.size[1] > 0)
{
memmove(m_AudioBuffer.data[1], m_AudioBuffer.data[1]+linesize, m_AudioBuffer.size[1]);
}
}
m_AudioBuffer.samples -= m_pCodecCtx->frame_size;
}
return ret;
}
void CAudioEncoder::flush()
{
if (NULL == m_pCodecCtx ||
NULL == m_pCodecCtx->codec ||
!(m_pCodecCtx->codec->capabilities | AV_CODEC_CAP_DELAY))
{
return;
}
encodeInternal(NULL);
}
BOOL CAudioEncoder::encodeInternal(AVFrame * pFrame)
{
int ret;
/* send the frame for encoding */
ret = avcodec_send_frame(m_pCodecCtx, pFrame);
if (ret < 0)
{
log_print(HT_LOG_ERR, "error sending the frame to the encoder\r\n");
return FALSE;
}
/* read all the available output packets (in general there may be any
* number of them */
while (ret >= 0)
{
ret = avcodec_receive_packet(m_pCodecCtx, m_pPkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
{
return TRUE;
}
else if (ret < 0)
{
log_print(HT_LOG_ERR, "error encoding audio frame\r\n");
return FALSE;
}
if (m_pPkt->data && m_pPkt->size > 0)
{
procData(m_pPkt->data, m_pPkt->size, pFrame ? pFrame->nb_samples : 0);
}
else
{
log_print(HT_LOG_WARN, "%s, data is null\r\n", __FUNCTION__);
}
av_packet_unref(m_pPkt);
}
return TRUE;
}
BOOL CAudioEncoder::encode(uint8 * data, int size)
{
if (!m_bInited)
{
return FALSE;
}
m_pFrame->data[0] = data;
m_pFrame->linesize[0] = size;
m_pFrame->nb_samples = size / (m_EncoderParams.SrcChannels * av_get_bytes_per_sample(m_EncoderParams.SrcSamplefmt));
m_pFrame->format = m_EncoderParams.SrcSamplefmt;
m_pFrame->key_frame = 1;
m_pFrame->sample_rate = m_EncoderParams.SrcSamplerate;
m_pFrame->channels = m_EncoderParams.SrcChannels;
m_pFrame->channel_layout = av_get_default_channel_layout(m_EncoderParams.SrcChannels);
return encode(m_pFrame);
}
BOOL CAudioEncoder::encode(AVFrame * pFrame)
{
BOOL ret = TRUE;
if (!m_bInited)
{
return FALSE;
}
if (m_pSwrCtx)
{
if (NULL == m_pResampleFrame)
{
m_pResampleFrame = av_frame_alloc();
if (NULL == m_pResampleFrame)
{
return FALSE;
}
}
m_pResampleFrame->sample_rate = m_EncoderParams.DstSamplerate;
m_pResampleFrame->format = m_EncoderParams.DstSamplefmt;
m_pResampleFrame->channels = m_EncoderParams.DstChannels;
m_pResampleFrame->channel_layout = av_get_default_channel_layout(m_EncoderParams.DstChannels);
int swrret = swr_convert_frame(m_pSwrCtx, m_pResampleFrame, pFrame);
if (swrret == 0)
{
ret = bufferFrame(m_pResampleFrame);
}
else
{
ret = FALSE;
}
av_frame_unref(m_pResampleFrame);
}
else
{
ret = bufferFrame(pFrame);
}
return ret;
}
void CAudioEncoder::procData(uint8 * data, int size, int nbsamples)
{
AudioEncoderCB * p_cb = NULL;
LINKED_NODE * p_node = NULL;
sys_os_mutex_enter(m_pCallbackMutex);
p_node = h_list_lookup_start(m_pCallbackList);
while (p_node)
{
p_cb = (AudioEncoderCB *) p_node->p_data;
if (p_cb->pCallback != NULL)
{
p_cb->pCallback(data, size, nbsamples, p_cb->pUserdata);
}
p_node = h_list_lookup_next(m_pCallbackList, p_node);
}
h_list_lookup_end(m_pCallbackList);
sys_os_mutex_leave(m_pCallbackMutex);
}
BOOL CAudioEncoder::isCallbackExist(AudioDataCallback pCallback, void *pUserdata)
{
BOOL exist = FALSE;
AudioEncoderCB * p_cb = NULL;
LINKED_NODE * p_node = NULL;
sys_os_mutex_enter(m_pCallbackMutex);
p_node = h_list_lookup_start(m_pCallbackList);
while (p_node)
{
p_cb = (AudioEncoderCB *) p_node->p_data;
if (p_cb->pCallback == pCallback && p_cb->pUserdata == pUserdata)
{
exist = TRUE;
break;
}
p_node = h_list_lookup_next(m_pCallbackList, p_node);
}
h_list_lookup_end(m_pCallbackList);
sys_os_mutex_leave(m_pCallbackMutex);
return exist;
}
void CAudioEncoder::addCallback(AudioDataCallback pCallback, void *pUserdata)
{
if (isCallbackExist(pCallback, pUserdata))
{
return;
}
AudioEncoderCB * p_cb = (AudioEncoderCB *) malloc(sizeof(AudioEncoderCB));
if (NULL == p_cb)
{
return;
}
p_cb->pCallback = pCallback;
p_cb->pUserdata = pUserdata;
p_cb->bFirst = TRUE;
sys_os_mutex_enter(m_pCallbackMutex);
h_list_add_at_back(m_pCallbackList, p_cb);
sys_os_mutex_leave(m_pCallbackMutex);
}
void CAudioEncoder::delCallback(AudioDataCallback pCallback, void *pUserdata)
{
AudioEncoderCB * p_cb = NULL;
LINKED_NODE * p_node = NULL;
sys_os_mutex_enter(m_pCallbackMutex);
p_node = h_list_lookup_start(m_pCallbackList);
while (p_node)
{
p_cb = (AudioEncoderCB *) p_node->p_data;
if (p_cb->pCallback == pCallback && p_cb->pUserdata == pUserdata)
{
free(p_cb);
h_list_remove(m_pCallbackList, p_node);
break;
}
p_node = h_list_lookup_next(m_pCallbackList, p_node);
}
h_list_lookup_end(m_pCallbackList);
sys_os_mutex_leave(m_pCallbackMutex);
}
char * CAudioEncoder::getAACAuxSDPLine(int rtp_pt)
{
if (NULL == m_pCodecCtx || m_pCodecCtx->extradata_size == 0)
{
return NULL;
}
char const* fmtpFmt =
"a=fmtp:%d "
"streamtype=5;profile-level-id=1;"
"mode=AAC-hbr;sizelength=13;indexlength=3;indexdeltalength=3;"
"config=";
uint32 fmtpFmtSize = strlen(fmtpFmt)
+ 3 /* max char len */
+ 2*m_pCodecCtx->extradata_size; /* 2*, because each byte prints as 2 chars */
char* fmtp = new char[fmtpFmtSize+1];
memset(fmtp, 0, fmtpFmtSize+1);
sprintf(fmtp, fmtpFmt, rtp_pt);
char* endPtr = &fmtp[strlen(fmtp)];
for (int i = 0; i < m_pCodecCtx->extradata_size; ++i)
{
sprintf(endPtr, "%02X", m_pCodecCtx->extradata[i]);
endPtr += 2;
}
return fmtp;
}
char * CAudioEncoder::getAuxSDPLine(int rtp_pt)
{
if (m_nCodecId == AV_CODEC_ID_AAC)
{
return getAACAuxSDPLine(rtp_pt);
}
return NULL;
}
BOOL CAudioEncoder::getExtraData(uint8 ** extradata, int * extralen)
{
if (m_pCodecCtx && m_pCodecCtx->extradata_size > 0)
{
*extradata = m_pCodecCtx->extradata;
*extralen = m_pCodecCtx->extradata_size;
return TRUE;
}
return FALSE;
}