libavformat/xmv.c
Go to the documentation of this file.
00001 /*
00002  * Microsoft XMV demuxer
00003  * Copyright (c) 2011 Sven Hesse <drmccoy@drmccoy.de>
00004  * Copyright (c) 2011 Matthew Hoops <clone2727@gmail.com>
00005  *
00006  * This file is part of Libav.
00007  *
00008  * Libav is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU Lesser General Public
00010  * License as published by the Free Software Foundation; either
00011  * version 2.1 of the License, or (at your option) any later version.
00012  *
00013  * Libav is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016  * Lesser General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU Lesser General Public
00019  * License along with Libav; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00021  */
00022 
00028 #include <stdint.h>
00029 
00030 #include "libavutil/intreadwrite.h"
00031 
00032 #include "avformat.h"
00033 #include "internal.h"
00034 #include "riff.h"
00035 
00036 #define XMV_MIN_HEADER_SIZE 36
00037 
00038 #define XMV_AUDIO_ADPCM51_FRONTLEFTRIGHT 1
00039 #define XMV_AUDIO_ADPCM51_FRONTCENTERLOW 2
00040 #define XMV_AUDIO_ADPCM51_REARLEFTRIGHT  4
00041 
00042 #define XMV_AUDIO_ADPCM51 (XMV_AUDIO_ADPCM51_FRONTLEFTRIGHT | \
00043                            XMV_AUDIO_ADPCM51_FRONTCENTERLOW | \
00044                            XMV_AUDIO_ADPCM51_REARLEFTRIGHT)
00045 
00046 #define XMV_BLOCK_ALIGN_SIZE 36
00047 
00048 typedef struct XMVAudioTrack {
00049     uint16_t compression;
00050     uint16_t channels;
00051     uint32_t sample_rate;
00052     uint16_t bits_per_sample;
00053     uint32_t bit_rate;
00054     uint16_t flags;
00055     uint16_t block_align;
00056     uint16_t block_samples;
00057 
00058     enum CodecID codec_id;
00059 } XMVAudioTrack;
00060 
00061 typedef struct XMVVideoPacket {
00062     /* The decoder stream index for this video packet. */
00063     int stream_index;
00064 
00065     uint32_t data_size;
00066     uint32_t data_offset;
00067 
00068     uint32_t current_frame;
00069     uint32_t frame_count;
00070 
00071     /* Does the video packet contain extra data? */
00072     int has_extradata;
00073 
00074     /* Extra data */
00075     uint8_t extradata[4];
00076 
00077     int64_t last_pts;
00078     int64_t pts;
00079 } XMVVideoPacket;
00080 
00081 typedef struct XMVAudioPacket {
00082     /* The decoder stream index for this audio packet. */
00083     int stream_index;
00084 
00085     /* The audio track this packet encodes. */
00086     XMVAudioTrack *track;
00087 
00088     uint32_t data_size;
00089     uint32_t data_offset;
00090 
00091     uint32_t frame_size;
00092 
00093     uint32_t block_count;
00094 } XMVAudioPacket;
00095 
00096 typedef struct XMVDemuxContext {
00097     uint16_t audio_track_count;
00098 
00099     XMVAudioTrack *audio_tracks;
00100 
00101     uint32_t this_packet_size;
00102     uint32_t next_packet_size;
00103 
00104     uint32_t this_packet_offset;
00105     uint32_t next_packet_offset;
00106 
00107     uint16_t current_stream;
00108     uint16_t stream_count;
00109 
00110     XMVVideoPacket  video;
00111     XMVAudioPacket *audio;
00112 } XMVDemuxContext;
00113 
00114 static int xmv_probe(AVProbeData *p)
00115 {
00116     uint32_t file_version;
00117 
00118     if (p->buf_size < XMV_MIN_HEADER_SIZE)
00119         return 0;
00120 
00121     file_version = AV_RL32(p->buf + 16);
00122     if ((file_version == 0) || (file_version > 4))
00123         return 0;
00124 
00125     if (!memcmp(p->buf + 12, "xobX", 4))
00126         return AVPROBE_SCORE_MAX;
00127 
00128     return 0;
00129 }
00130 
00131 static int xmv_read_close(AVFormatContext *s)
00132 {
00133     XMVDemuxContext *xmv = s->priv_data;
00134 
00135     av_free(xmv->audio);
00136     av_free(xmv->audio_tracks);
00137 
00138     return 0;
00139 }
00140 
00141 static int xmv_read_header(AVFormatContext *s,
00142                            AVFormatParameters *ap)
00143 {
00144     XMVDemuxContext *xmv = s->priv_data;
00145     AVIOContext     *pb  = s->pb;
00146     AVStream        *vst = NULL;
00147 
00148     uint32_t file_version;
00149     uint32_t this_packet_size;
00150     uint16_t audio_track;
00151     int ret;
00152 
00153     avio_skip(pb, 4); /* Next packet size */
00154 
00155     this_packet_size = avio_rl32(pb);
00156 
00157     avio_skip(pb, 4); /* Max packet size */
00158     avio_skip(pb, 4); /* "xobX" */
00159 
00160     file_version = avio_rl32(pb);
00161     if ((file_version != 4) && (file_version != 2))
00162         av_log_ask_for_sample(s, "Found uncommon version %d\n", file_version);
00163 
00164 
00165     /* Video track */
00166 
00167     vst = avformat_new_stream(s, NULL);
00168     if (!vst)
00169         return AVERROR(ENOMEM);
00170 
00171     avpriv_set_pts_info(vst, 32, 1, 1000);
00172 
00173     vst->codec->codec_type = AVMEDIA_TYPE_VIDEO;
00174     vst->codec->codec_id   = CODEC_ID_WMV2;
00175     vst->codec->codec_tag  = MKBETAG('W', 'M', 'V', '2');
00176     vst->codec->width      = avio_rl32(pb);
00177     vst->codec->height     = avio_rl32(pb);
00178 
00179     vst->duration          = avio_rl32(pb);
00180 
00181     xmv->video.stream_index = vst->index;
00182 
00183     /* Audio tracks */
00184 
00185     xmv->audio_track_count = avio_rl16(pb);
00186 
00187     avio_skip(pb, 2); /* Unknown (padding?) */
00188 
00189     xmv->audio_tracks = av_malloc(xmv->audio_track_count * sizeof(XMVAudioTrack));
00190     if (!xmv->audio_tracks)
00191         return AVERROR(ENOMEM);
00192 
00193     xmv->audio = av_malloc(xmv->audio_track_count * sizeof(XMVAudioPacket));
00194     if (!xmv->audio) {
00195         ret = AVERROR(ENOMEM);
00196         goto fail;
00197     }
00198 
00199     for (audio_track = 0; audio_track < xmv->audio_track_count; audio_track++) {
00200         XMVAudioTrack  *track  = &xmv->audio_tracks[audio_track];
00201         XMVAudioPacket *packet = &xmv->audio       [audio_track];
00202         AVStream *ast = NULL;
00203 
00204         track->compression     = avio_rl16(pb);
00205         track->channels        = avio_rl16(pb);
00206         track->sample_rate     = avio_rl32(pb);
00207         track->bits_per_sample = avio_rl16(pb);
00208         track->flags           = avio_rl16(pb);
00209 
00210         track->bit_rate      = track->bits_per_sample *
00211                                track->sample_rate *
00212                                track->channels;
00213         track->block_align   = XMV_BLOCK_ALIGN_SIZE * track->channels;
00214         track->block_samples = 64;
00215         track->codec_id      = ff_wav_codec_get_id(track->compression,
00216                                                    track->bits_per_sample);
00217 
00218         packet->track        = track;
00219         packet->stream_index = -1;
00220 
00221         packet->frame_size  = 0;
00222         packet->block_count = 0;
00223 
00224         /* TODO: ADPCM'd 5.1 sound is encoded in three separate streams.
00225          *       Those need to be interleaved to a proper 5.1 stream. */
00226         if (track->flags & XMV_AUDIO_ADPCM51)
00227             av_log(s, AV_LOG_WARNING, "Unsupported 5.1 ADPCM audio stream "
00228                                       "(0x%04X)\n", track->flags);
00229 
00230         if (!track->channels || !track->sample_rate ||
00231              track->channels >= UINT16_MAX / XMV_BLOCK_ALIGN_SIZE) {
00232             av_log(s, AV_LOG_ERROR, "Invalid parameters for audio track %d.\n",
00233                    audio_track);
00234             ret = AVERROR_INVALIDDATA;
00235             goto fail;
00236         }
00237 
00238         ast = avformat_new_stream(s, NULL);
00239         if (!ast) {
00240             ret = AVERROR(ENOMEM);
00241             goto fail;
00242         }
00243 
00244         ast->codec->codec_type            = AVMEDIA_TYPE_AUDIO;
00245         ast->codec->codec_id              = track->codec_id;
00246         ast->codec->codec_tag             = track->compression;
00247         ast->codec->channels              = track->channels;
00248         ast->codec->sample_rate           = track->sample_rate;
00249         ast->codec->bits_per_coded_sample = track->bits_per_sample;
00250         ast->codec->bit_rate              = track->bit_rate;
00251         ast->codec->block_align           = 36 * track->channels;
00252 
00253         avpriv_set_pts_info(ast, 32, track->block_samples, track->sample_rate);
00254 
00255         packet->stream_index = ast->index;
00256 
00257         ast->duration = vst->duration;
00258     }
00259 
00260 
00263     xmv->next_packet_offset = avio_tell(pb);
00264     xmv->next_packet_size   = this_packet_size - xmv->next_packet_offset;
00265     xmv->stream_count       = xmv->audio_track_count + 1;
00266 
00267     return 0;
00268 
00269 fail:
00270     xmv_read_close(s);
00271     return ret;
00272 }
00273 
00274 static void xmv_read_extradata(uint8_t *extradata, AVIOContext *pb)
00275 {
00276     /* Read the XMV extradata */
00277 
00278     uint32_t data = avio_rl32(pb);
00279 
00280     int mspel_bit        = !!(data & 0x01);
00281     int loop_filter      = !!(data & 0x02);
00282     int abt_flag         = !!(data & 0x04);
00283     int j_type_bit       = !!(data & 0x08);
00284     int top_left_mv_flag = !!(data & 0x10);
00285     int per_mb_rl_bit    = !!(data & 0x20);
00286     int slice_count      = (data >> 6) & 7;
00287 
00288     /* Write it back as standard WMV2 extradata */
00289 
00290     data = 0;
00291 
00292     data |= mspel_bit        << 15;
00293     data |= loop_filter      << 14;
00294     data |= abt_flag         << 13;
00295     data |= j_type_bit       << 12;
00296     data |= top_left_mv_flag << 11;
00297     data |= per_mb_rl_bit    << 10;
00298     data |= slice_count      <<  7;
00299 
00300     AV_WB32(extradata, data);
00301 }
00302 
00303 static int xmv_process_packet_header(AVFormatContext *s)
00304 {
00305     XMVDemuxContext *xmv = s->priv_data;
00306     AVIOContext     *pb  = s->pb;
00307 
00308     uint8_t  data[8];
00309     uint16_t audio_track;
00310     uint32_t data_offset;
00311 
00312     /* Next packet size */
00313     xmv->next_packet_size = avio_rl32(pb);
00314 
00315     /* Packet video header */
00316 
00317     if (avio_read(pb, data, 8) != 8)
00318         return AVERROR(EIO);
00319 
00320     xmv->video.data_size     = AV_RL32(data) & 0x007FFFFF;
00321 
00322     xmv->video.current_frame = 0;
00323     xmv->video.frame_count   = (AV_RL32(data) >> 23) & 0xFF;
00324 
00325     xmv->video.has_extradata = (data[3] & 0x80) != 0;
00326 
00327     /* Adding the audio data sizes and the video data size keeps you 4 bytes
00328      * short for every audio track. But as playing around with XMV files with
00329      * ADPCM audio showed, taking the extra 4 bytes from the audio data gives
00330      * you either completely distorted audio or click (when skipping the
00331      * remaining 68 bytes of the ADPCM block). Substracting 4 bytes for every
00332      * audio track from the video data works at least for the audio. Probably
00333      * some alignment thing?
00334      * The video data has (always?) lots of padding, so it should work out...
00335      */
00336     xmv->video.data_size -= xmv->audio_track_count * 4;
00337 
00338     xmv->current_stream = 0;
00339     if (!xmv->video.frame_count) {
00340         xmv->video.frame_count = 1;
00341         xmv->current_stream    = 1;
00342     }
00343 
00344     /* Packet audio header */
00345 
00346     for (audio_track = 0; audio_track < xmv->audio_track_count; audio_track++) {
00347         XMVAudioPacket *packet = &xmv->audio[audio_track];
00348 
00349         if (avio_read(pb, data, 4) != 4)
00350             return AVERROR(EIO);
00351 
00352         packet->data_size = AV_RL32(data) & 0x007FFFFF;
00353         if ((packet->data_size == 0) && (audio_track != 0))
00354             /* This happens when I create an XMV with several identical audio
00355              * streams. From the size calculations, duplicating the previous
00356              * stream's size works out, but the track data itself is silent.
00357              * Maybe this should also redirect the offset to the previous track?
00358              */
00359             packet->data_size = xmv->audio[audio_track - 1].data_size;
00360 
00362         packet->frame_size  = packet->data_size  / xmv->video.frame_count;
00363         packet->frame_size -= packet->frame_size % packet->track->block_align;
00364     }
00365 
00366     /* Packet data offsets */
00367 
00368     data_offset = avio_tell(pb);
00369 
00370     xmv->video.data_offset = data_offset;
00371     data_offset += xmv->video.data_size;
00372 
00373     for (audio_track = 0; audio_track < xmv->audio_track_count; audio_track++) {
00374         xmv->audio[audio_track].data_offset = data_offset;
00375         data_offset += xmv->audio[audio_track].data_size;
00376     }
00377 
00378     /* Video frames header */
00379 
00380     /* Read new video extra data */
00381     if (xmv->video.data_size > 0) {
00382         if (xmv->video.has_extradata) {
00383             xmv_read_extradata(xmv->video.extradata, pb);
00384 
00385             xmv->video.data_size   -= 4;
00386             xmv->video.data_offset += 4;
00387 
00388             if (xmv->video.stream_index >= 0) {
00389                 AVStream *vst = s->streams[xmv->video.stream_index];
00390 
00391                 assert(xmv->video.stream_index < s->nb_streams);
00392 
00393                 if (vst->codec->extradata_size < 4) {
00394                     av_free(vst->codec->extradata);
00395 
00396                     vst->codec->extradata =
00397                         av_malloc(4 + FF_INPUT_BUFFER_PADDING_SIZE);
00398                     vst->codec->extradata_size = 4;
00399                 }
00400 
00401                 memcpy(vst->codec->extradata, xmv->video.extradata, 4);
00402             }
00403         }
00404     }
00405 
00406     return 0;
00407 }
00408 
00409 static int xmv_fetch_new_packet(AVFormatContext *s)
00410 {
00411     XMVDemuxContext *xmv = s->priv_data;
00412     AVIOContext     *pb  = s->pb;
00413     int result;
00414 
00415     /* Seek to it */
00416     xmv->this_packet_offset = xmv->next_packet_offset;
00417     if (avio_seek(pb, xmv->this_packet_offset, SEEK_SET) != xmv->this_packet_offset)
00418         return AVERROR(EIO);
00419 
00420     /* Update the size */
00421     xmv->this_packet_size = xmv->next_packet_size;
00422     if (xmv->this_packet_size < (12 + xmv->audio_track_count * 4))
00423         return AVERROR(EIO);
00424 
00425     /* Process the header */
00426     result = xmv_process_packet_header(s);
00427     if (result)
00428         return result;
00429 
00430     /* Update the offset */
00431     xmv->next_packet_offset = xmv->this_packet_offset + xmv->this_packet_size;
00432 
00433     return 0;
00434 }
00435 
00436 static int xmv_fetch_audio_packet(AVFormatContext *s,
00437                                   AVPacket *pkt, uint32_t stream)
00438 {
00439     XMVDemuxContext *xmv   = s->priv_data;
00440     AVIOContext     *pb    = s->pb;
00441     XMVAudioPacket  *audio = &xmv->audio[stream];
00442 
00443     uint32_t data_size;
00444     uint32_t block_count;
00445     int result;
00446 
00447     /* Seek to it */
00448     if (avio_seek(pb, audio->data_offset, SEEK_SET) != audio->data_offset)
00449         return AVERROR(EIO);
00450 
00451     if ((xmv->video.current_frame + 1) < xmv->video.frame_count)
00452         /* Not the last frame, get at most frame_size bytes. */
00453         data_size = FFMIN(audio->frame_size, audio->data_size);
00454     else
00455         /* Last frame, get the rest. */
00456         data_size = audio->data_size;
00457 
00458     /* Read the packet */
00459     result = av_get_packet(pb, pkt, data_size);
00460     if (result <= 0)
00461         return result;
00462 
00463     pkt->stream_index = audio->stream_index;
00464 
00465     /* Calculate the PTS */
00466 
00467     block_count = data_size / audio->track->block_align;
00468 
00469     pkt->duration = block_count;
00470     pkt->pts      = audio->block_count;
00471     pkt->dts      = AV_NOPTS_VALUE;
00472 
00473     audio->block_count += block_count;
00474 
00475     /* Advance offset */
00476     audio->data_size   -= data_size;
00477     audio->data_offset += data_size;
00478 
00479     return 0;
00480 }
00481 
00482 static int xmv_fetch_video_packet(AVFormatContext *s,
00483                                   AVPacket *pkt)
00484 {
00485     XMVDemuxContext *xmv   = s->priv_data;
00486     AVIOContext     *pb    = s->pb;
00487     XMVVideoPacket  *video = &xmv->video;
00488 
00489     int result;
00490     uint32_t frame_header;
00491     uint32_t frame_size, frame_timestamp;
00492     uint32_t i;
00493 
00494     /* Seek to it */
00495     if (avio_seek(pb, video->data_offset, SEEK_SET) != video->data_offset)
00496         return AVERROR(EIO);
00497 
00498     /* Read the frame header */
00499     frame_header = avio_rl32(pb);
00500 
00501     frame_size      = (frame_header & 0x1FFFF) * 4 + 4;
00502     frame_timestamp = (frame_header >> 17);
00503 
00504     if ((frame_size + 4) > video->data_size)
00505         return AVERROR(EIO);
00506 
00507     /* Create the packet */
00508     result = av_new_packet(pkt, frame_size);
00509     if (result)
00510         return result;
00511 
00512     /* Contrary to normal WMV2 video, the bit stream in XMV's
00513      * WMV2 is little-endian.
00514      * TODO: This manual swap is of course suboptimal.
00515      */
00516     for (i = 0; i < frame_size; i += 4)
00517         AV_WB32(pkt->data + i, avio_rl32(pb));
00518 
00519     pkt->stream_index = video->stream_index;
00520 
00521     /* Calculate the PTS */
00522 
00523     video->last_pts = frame_timestamp + video->pts;
00524 
00525     pkt->duration = 0;
00526     pkt->pts      = video->last_pts;
00527     pkt->dts      = AV_NOPTS_VALUE;
00528 
00529     video->pts += frame_timestamp;
00530 
00531     /* Keyframe? */
00532     pkt->flags = (pkt->data[0] & 0x80) ? 0 : AV_PKT_FLAG_KEY;
00533 
00534     /* Advance offset */
00535     video->data_size   -= frame_size + 4;
00536     video->data_offset += frame_size + 4;
00537 
00538     return 0;
00539 }
00540 
00541 static int xmv_read_packet(AVFormatContext *s,
00542                            AVPacket *pkt)
00543 {
00544     XMVDemuxContext *xmv = s->priv_data;
00545     int result;
00546 
00547     if (xmv->video.current_frame == xmv->video.frame_count) {
00548         /* No frames left in this packet, so we fetch a new one */
00549 
00550         result = xmv_fetch_new_packet(s);
00551         if (result)
00552             return result;
00553     }
00554 
00555     if (xmv->current_stream == 0) {
00556         /* Fetch a video frame */
00557 
00558         result = xmv_fetch_video_packet(s, pkt);
00559         if (result)
00560             return result;
00561 
00562     } else {
00563         /* Fetch an audio frame */
00564 
00565         result = xmv_fetch_audio_packet(s, pkt, xmv->current_stream - 1);
00566         if (result)
00567             return result;
00568     }
00569 
00570     /* Increase our counters */
00571     if (++xmv->current_stream >= xmv->stream_count) {
00572         xmv->current_stream       = 0;
00573         xmv->video.current_frame += 1;
00574     }
00575 
00576     return 0;
00577 }
00578 
00579 AVInputFormat ff_xmv_demuxer = {
00580     .name           = "xmv",
00581     .long_name      = NULL_IF_CONFIG_SMALL("Microsoft XMV"),
00582     .priv_data_size = sizeof(XMVDemuxContext),
00583     .read_probe     = xmv_probe,
00584     .read_header    = xmv_read_header,
00585     .read_packet    = xmv_read_packet,
00586     .read_close     = xmv_read_close,
00587 };