让 OpenAL 也支持 S16 Planar（辅以 FFmpeg）

2014-04-05

TechCoding

FFmpeg, OpenAL, SDL2, 博客园记录

博客园地址

正在制作某物品，现在做到音频部分了。

原本要采用 SDL2_mixer 的，不过实验结果表明其失真非常严重，还带有大量的电噪声。不知道是不是我打开的方式不对……

一气之下去看 OpenAL，结果吃了闭门羹（维护中，只有 mailing list 和 specification）。转投 FMOD，不过又考虑到其授权方式，还是放弃了。最终回到 OpenAL。使用的是 OpenAL-Soft。

OpenAL 呢，好的方面是开源+授权，坏的方面……呃，至少在刚刚的测试中，代码维护甚至没有 SDL 好。直接编译 .c 示例失败，耍小聪明改成 .cpp 拿去编译才成功。

在接下来的代码中，需要用到 OpenAL-Soft（1.15.1）和 FFmpeg。

看 OpenAL-Soft 自带的示例 alstream.c。为了方便起见，接下来的 C 源代码文件全部改成 C++ 源代码文件去……同时不要忘了在 FFmpeg 的头文件上下加 extern “C”！（为什么他们不考虑这一点？）

好，编译示例，运行。（注意，各种 dependencies 这里就不提了。）随便选择一个含有音频的、可以被 FFmpeg 解码的文件。

不对啊！很有可能出现以下错误信息：

Opened “OpenAL Soft”

AL_SOFT_buffer_samples supported!

Unsupported ffmpeg sample format: s16p

Error getting audio info for 01.mpg

Done.

这是……怎么回事？经过测试，SDL_mixer 可以播放同一个文件，不过正如之前所说的，失真&噪声。看其采样格式：S16P（Signed 16-bit, Planar←平面？）。再看源代码，S16（Signed 16-bit）是支持的。（当然，如果强制将那几个 if 修改一下的话，你会听到神奇的东西……）S16 和 S16P 的不同点是在于数据的排列方式，前者是相邻连续排列，后者是分离排列。但是现在有相当多的音频文件采用 planar 的方案，不仅是 S16，U8、S32、F32、F64 都有对应的 planar 方式。现在，目标就是：让这个示例支持 planar。

思路很简单。我的上一篇随笔中，有一个 AudioResampling() 函数，这里直接拿来用吧！（秉持拿来主义！鲁迅先生不谢。）

接下来就是好戏了。

又试验了一下，播放 U8/Mono 的时候出现崩溃，不知道原因。调试的时候内存是越界的。

先是添加对 libswresample 和 libavutil（要用到 opt_* 函数）的包含（别忘了添加对应的库）：

#ifdef __cplusplus
extern "C" {
#endif
#include "libavutil/opt.h"
#include "libswresample/swresample.h"
#ifdef __cplusplus
}
#endif

然后是修改 MyStream 的定义：

struct MyStream {
    AVCodecContext *CodecCtx;
    int StreamIdx;

    struct PacketList *Packets;

    AVFrame *Frame;

    // FrameData 没什么用了，不过为了保持代码结构，还是保留下来，其作用由 FrameBuffer 代替
    const uint8_t *FrameData;
    const uint8_t FrameBuffer[FRAME_BUFFER_SIZE];

    size_t FrameDataSize;

    FilePtr parent;
};

可以先定义一下 FRAME_BUFFER_SIZE：

// MP3 每一帧的大小是4608，所以如果设定成4096（一般音频可以播放）的话会造成溢出、崩溃
#define FRAME_BUFFER_SIZE            (4800)

直接插入 AudioResampling() 函数（如果对这错误的时态感到别扭，改一下就好了），添加重采样支持：

static int AudioResampling(AVCodecContext * audio_dec_ctx,
                    AVFrame * pAudioDecodeFrame,
                    int out_sample_fmt,
                    int out_channels,
                    int out_sample_rate,
                    uint8_t* out_buf)
{
    SwrContext * swr_ctx = NULL;
    int data_size = 0;
    int ret = 0;
    int64_t src_ch_layout = audio_dec_ctx->channel_layout;
    int64_t dst_ch_layout = AV_CH_LAYOUT_STEREO;
    int dst_nb_channels = 0;
    int dst_linesize = 0;
    int src_nb_samples = 0;
    int dst_nb_samples = 0;
    int max_dst_nb_samples = 0;
    uint8_t **dst_data = NULL;
    int resampled_data_size = 0;

    swr_ctx = swr_alloc();
    if (!swr_ctx)
    {
        printf("swr_alloc error \n");
        return -1;
    }

    src_ch_layout = (audio_dec_ctx->channels ==
                     av_get_channel_layout_nb_channels(audio_dec_ctx->channel_layout)) ?
                     audio_dec_ctx->channel_layout :
                     av_get_default_channel_layout(audio_dec_ctx->channels);

    if (out_channels == 1)
    {
        dst_ch_layout = AV_CH_LAYOUT_MONO;
        //printf("dst_ch_layout: AV_CH_LAYOUT_MONO\n");
    }
    else if (out_channels == 2)
    {
        dst_ch_layout = AV_CH_LAYOUT_STEREO;
        //printf("dst_ch_layout: AV_CH_LAYOUT_STEREO\n");
    }
    else
    {
        dst_ch_layout = AV_CH_LAYOUT_SURROUND;
        //printf("dst_ch_layout: AV_CH_LAYOUT_SURROUND\n");
    }

    if (src_ch_layout <= 0)
    {
        printf("src_ch_layout error \n");
        return -1;
    }

    src_nb_samples = pAudioDecodeFrame->nb_samples;
    if (src_nb_samples <= 0)
    {
        printf("src_nb_samples error \n");
        return -1;
    }

    av_opt_set_int(swr_ctx, "in_channel_layout", src_ch_layout, 0);
    av_opt_set_int(swr_ctx, "in_sample_rate", audio_dec_ctx->sample_rate, 0);
    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", audio_dec_ctx->sample_fmt, 0);

    av_opt_set_int(swr_ctx, "out_channel_layout", dst_ch_layout, 0);
    av_opt_set_int(swr_ctx, "out_sample_rate", out_sample_rate, 0);
    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", (AVSampleFormat)out_sample_fmt, 0);

    if ((ret = swr_init(swr_ctx)) < 0) {
        printf("Failed to initialize the resampling context\n");
        return -1;
    }

    max_dst_nb_samples = dst_nb_samples = av_rescale_rnd(src_nb_samples,
                                                         out_sample_rate, audio_dec_ctx->sample_rate, AV_ROUND_UP);
    if (max_dst_nb_samples <= 0)
    {
        printf("av_rescale_rnd error \n");
        return -1;
    }

    dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
    ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_nb_channels,
                                             dst_nb_samples, (AVSampleFormat)out_sample_fmt, 0);
    if (ret < 0)
    {
        printf("av_samples_alloc_array_and_samples error \n");
        return -1;
    }


    dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, audio_dec_ctx->sample_rate) +
                                    src_nb_samples, out_sample_rate, audio_dec_ctx->sample_rate, AV_ROUND_UP);
    if (dst_nb_samples <= 0)
    {
        printf("av_rescale_rnd error \n");
        return -1;
    }
    if (dst_nb_samples > max_dst_nb_samples)
    {
        av_free(dst_data[0]);
        ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
                               dst_nb_samples, (AVSampleFormat)out_sample_fmt, 1);
        max_dst_nb_samples = dst_nb_samples;
    }

    if (swr_ctx)
    {
        ret = swr_convert(swr_ctx, dst_data, dst_nb_samples,
                          (const uint8_t **)pAudioDecodeFrame->data, pAudioDecodeFrame->nb_samples);
        if (ret < 0)
        {
            printf("swr_convert error \n");
            return -1;
        }

        resampled_data_size = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
                                                         ret, (AVSampleFormat)out_sample_fmt, 1);
        if (resampled_data_size < 0)
        {
            printf("av_samples_get_buffer_size error \n");
            return -1;
        }
    }
    else
    {
        printf("swr_ctx null error \n");
        return -1;
    }

    memcpy(out_buf, dst_data[0], resampled_data_size);

    if (dst_data)
    {
        av_freep(&dst_data[0]);
    }
    av_freep(&dst_data);
    dst_data = NULL;

    if (swr_ctx)
    {
        swr_free(&swr_ctx);
    }
    return resampled_data_size;
}

修改 getAVAudioData() 函数：

uint8_t *getAVAudioData(StreamPtr stream, size_t *length)
{
    int got_frame;
    int len;

    if(length) *length = 0;

    if(!stream || stream->CodecCtx->codec_type != AVMEDIA_TYPE_AUDIO)
        return NULL;

next_packet:
    if(!stream->Packets && !getNextPacket(stream->parent, stream->StreamIdx))
        return NULL;

    /* Decode some data, and check for errors */
    avcodec_get_frame_defaults(stream->Frame);
    while((len=avcodec_decode_audio4(stream->CodecCtx, stream->Frame,
                                     &got_frame, &stream->Packets->pkt)) < 0)
    {
        struct PacketList *self;

        /* Error? Drop it and try the next, I guess... */
        self = stream->Packets;
        stream->Packets = self->next;

        av_free_packet(&self->pkt);
        av_free(self);

        if(!stream->Packets)
            goto next_packet;
    }

    if(len < stream->Packets->pkt.size)
    {
        /* Move the unread data to the front and clear the end bits */
        int remaining = stream->Packets->pkt.size - len;
        memmove(stream->Packets->pkt.data, &stream->Packets->pkt.data[len],
                remaining);
        memset(&stream->Packets->pkt.data[remaining], 0,
               stream->Packets->pkt.size - remaining);
        stream->Packets->pkt.size -= len;
    }
    else
    {
        struct PacketList *self;

        self = stream->Packets;
        stream->Packets = self->next;

        av_free_packet(&self->pkt);
        av_free(self);
    }

    if(!got_frame || stream->Frame->nb_samples == 0)
        goto next_packet;


    // 在这里插入重新采样代码

    *length = AudioResampling(stream->CodecCtx, stream->Frame, AV_SAMPLE_FMT_S16, stream->Frame->channels, stream->Frame->sample_rate, const_cast<uint8_t *>(stream->FrameBuffer));

    /* Set the output buffer size */
    /*
    *length = av_samples_get_buffer_size(NULL, stream->CodecCtx->channels,
                                               stream->Frame->nb_samples,
                                               stream->CodecCtx->sample_fmt, 1);

    return stream->Frame->data[0];
    */

    return const_cast<uint8_t *>(stream->FrameBuffer);
}

最后是 getAVAudioInfo() 函数，我们要让它允许 planar 音频输入：

int getAVAudioInfo(StreamPtr stream, ALuint *rate, ALenum *channels, ALenum *type)
{
    if(!stream || stream->CodecCtx->codec_type != AVMEDIA_TYPE_AUDIO)
        return 1;

    /* Get the sample type for OpenAL given the format detected by ffmpeg. */
    if(stream->CodecCtx->sample_fmt == AV_SAMPLE_FMT_U8 || stream->CodecCtx->sample_fmt == AV_SAMPLE_FMT_U8P)
        *type = AL_UNSIGNED_BYTE_SOFT;
    else if (stream->CodecCtx->sample_fmt == AV_SAMPLE_FMT_S16 || stream->CodecCtx->sample_fmt == AV_SAMPLE_FMT_S16P)
        *type = AL_SHORT_SOFT;
    else if(stream->CodecCtx->sample_fmt == AV_SAMPLE_FMT_S32 || stream->CodecCtx->sample_fmt == AV_SAMPLE_FMT_S32P)
        *type = AL_INT_SOFT;
    else if(stream->CodecCtx->sample_fmt == AV_SAMPLE_FMT_FLT || stream->CodecCtx->sample_fmt == AV_SAMPLE_FMT_FLTP)
        *type = AL_FLOAT_SOFT;
    else if(stream->CodecCtx->sample_fmt == AV_SAMPLE_FMT_DBL || stream->CodecCtx->sample_fmt == AV_SAMPLE_FMT_DBLP)
        *type = AL_DOUBLE_SOFT;
    else
    {
        fprintf(stderr, "Unsupported ffmpeg sample format: %s\n",
                av_get_sample_fmt_name(stream->CodecCtx->sample_fmt));
        return 1;
    }

    /* Get the OpenAL channel configuration using the channel layout detected
     * by ffmpeg. NOTE: some file types may not specify a channel layout. In
     * that case, one must be guessed based on the channel count. */
    if(stream->CodecCtx->channel_layout == AV_CH_LAYOUT_MONO)
        *channels = AL_MONO_SOFT;
    else if(stream->CodecCtx->channel_layout == AV_CH_LAYOUT_STEREO)
        *channels = AL_STEREO_SOFT;
    else if(stream->CodecCtx->channel_layout == AV_CH_LAYOUT_QUAD)
        *channels = AL_QUAD_SOFT;
    else if(stream->CodecCtx->channel_layout == AV_CH_LAYOUT_5POINT1_BACK)
        *channels = AL_5POINT1_SOFT;
    else if(stream->CodecCtx->channel_layout == AV_CH_LAYOUT_7POINT1)
        *channels = AL_7POINT1_SOFT;
    else if(stream->CodecCtx->channel_layout == 0)
    {
        /* Unknown channel layout. Try to guess. */
        if(stream->CodecCtx->channels == 1)
            *channels = AL_MONO_SOFT;
        else if(stream->CodecCtx->channels == 2)
            *channels = AL_STEREO_SOFT;
        else
        {
            fprintf(stderr, "Unsupported ffmpeg raw channel count: %d\n",
                    stream->CodecCtx->channels);
            return 1;
        }
    }
    else
    {
        char str[1024];
        av_get_channel_layout_string(str, sizeof(str), stream->CodecCtx->channels,
                                     stream->CodecCtx->channel_layout);
        fprintf(stderr, "Unsupported ffmpeg channel layout: %s\n", str);
        return 1;
    }

    *rate = stream->CodecCtx->sample_rate;

    return 0;
}

嗯，基本上就可以了。现在播放的话，对应的 planar 是不会显示出来的，因为显示调用的是 alhelpers.cpp 的 GetFormat()，而它是按照 OpenAL 的格式输出的。

不过这不影响播放嘛。