Libav 0.7.1
libavformat/id3v2.c
Go to the documentation of this file.
00001 /*
00002  * ID3v2 header parser
00003  * Copyright (c) 2003 Fabrice Bellard
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00022 #include "id3v2.h"
00023 #include "id3v1.h"
00024 #include "libavutil/avstring.h"
00025 #include "libavutil/intreadwrite.h"
00026 #include "libavutil/dict.h"
00027 #include "avio_internal.h"
00028 
00029 int ff_id3v2_match(const uint8_t *buf, const char * magic)
00030 {
00031     return  buf[0]         == magic[0] &&
00032             buf[1]         == magic[1] &&
00033             buf[2]         == magic[2] &&
00034             buf[3]         != 0xff &&
00035             buf[4]         != 0xff &&
00036            (buf[6] & 0x80) ==    0 &&
00037            (buf[7] & 0x80) ==    0 &&
00038            (buf[8] & 0x80) ==    0 &&
00039            (buf[9] & 0x80) ==    0;
00040 }
00041 
00042 int ff_id3v2_tag_len(const uint8_t * buf)
00043 {
00044     int len = ((buf[6] & 0x7f) << 21) +
00045               ((buf[7] & 0x7f) << 14) +
00046               ((buf[8] & 0x7f) << 7) +
00047                (buf[9] & 0x7f) +
00048               ID3v2_HEADER_SIZE;
00049     if (buf[5] & 0x10)
00050         len += ID3v2_HEADER_SIZE;
00051     return len;
00052 }
00053 
00054 static unsigned int get_size(AVIOContext *s, int len)
00055 {
00056     int v = 0;
00057     while (len--)
00058         v = (v << 7) + (avio_r8(s) & 0x7F);
00059     return v;
00060 }
00061 
00062 static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const char *key)
00063 {
00064     char *q, dst[512];
00065     const char *val = NULL;
00066     int len, dstlen = sizeof(dst) - 1;
00067     unsigned genre;
00068     unsigned int (*get)(AVIOContext*) = avio_rb16;
00069 
00070     dst[0] = 0;
00071     if (taglen < 1)
00072         return;
00073 
00074     taglen--; /* account for encoding type byte */
00075 
00076     switch (avio_r8(pb)) { /* encoding type */
00077 
00078     case ID3v2_ENCODING_ISO8859:
00079         q = dst;
00080         while (taglen-- && q - dst < dstlen - 7) {
00081             uint8_t tmp;
00082             PUT_UTF8(avio_r8(pb), tmp, *q++ = tmp;)
00083         }
00084         *q = 0;
00085         break;
00086 
00087     case ID3v2_ENCODING_UTF16BOM:
00088         taglen -= 2;
00089         switch (avio_rb16(pb)) {
00090         case 0xfffe:
00091             get = avio_rl16;
00092         case 0xfeff:
00093             break;
00094         default:
00095             av_log(s, AV_LOG_ERROR, "Incorrect BOM value in tag %s.\n", key);
00096             return;
00097         }
00098         // fall-through
00099 
00100     case ID3v2_ENCODING_UTF16BE:
00101         q = dst;
00102         while (taglen > 1 && q - dst < dstlen - 7) {
00103             uint32_t ch;
00104             uint8_t tmp;
00105 
00106             GET_UTF16(ch, ((taglen -= 2) >= 0 ? get(pb) : 0), break;)
00107             PUT_UTF8(ch, tmp, *q++ = tmp;)
00108         }
00109         *q = 0;
00110         break;
00111 
00112     case ID3v2_ENCODING_UTF8:
00113         len = FFMIN(taglen, dstlen);
00114         avio_read(pb, dst, len);
00115         dst[len] = 0;
00116         break;
00117     default:
00118         av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s.\n", key);
00119     }
00120 
00121     if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
00122         && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1)
00123         && genre <= ID3v1_GENRE_MAX)
00124         val = ff_id3v1_genre_str[genre];
00125     else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
00126         /* dst now contains two 0-terminated strings */
00127         dst[dstlen] = 0;
00128         len = strlen(dst);
00129         key = dst;
00130         val = dst + FFMIN(len + 1, dstlen);
00131     }
00132     else if (*dst)
00133         val = dst;
00134 
00135     if (val)
00136         av_dict_set(&s->metadata, key, val, AV_DICT_DONT_OVERWRITE);
00137 }
00138 
00139 static int is_number(const char *str)
00140 {
00141     while (*str >= '0' && *str <= '9') str++;
00142     return !*str;
00143 }
00144 
00145 static AVDictionaryEntry* get_date_tag(AVDictionary *m, const char *tag)
00146 {
00147     AVDictionaryEntry *t;
00148     if ((t = av_dict_get(m, tag, NULL, AV_DICT_MATCH_CASE)) &&
00149         strlen(t->value) == 4 && is_number(t->value))
00150         return t;
00151     return NULL;
00152 }
00153 
00154 static void merge_date(AVDictionary **m)
00155 {
00156     AVDictionaryEntry *t;
00157     char date[17] = {0};      // YYYY-MM-DD hh:mm
00158 
00159     if (!(t = get_date_tag(*m, "TYER")) &&
00160         !(t = get_date_tag(*m, "TYE")))
00161         return;
00162     av_strlcpy(date, t->value, 5);
00163     av_dict_set(m, "TYER", NULL, 0);
00164     av_dict_set(m, "TYE",  NULL, 0);
00165 
00166     if (!(t = get_date_tag(*m, "TDAT")) &&
00167         !(t = get_date_tag(*m, "TDA")))
00168         goto finish;
00169     snprintf(date + 4, sizeof(date) - 4, "-%.2s-%.2s", t->value + 2, t->value);
00170     av_dict_set(m, "TDAT", NULL, 0);
00171     av_dict_set(m, "TDA",  NULL, 0);
00172 
00173     if (!(t = get_date_tag(*m, "TIME")) &&
00174         !(t = get_date_tag(*m, "TIM")))
00175         goto finish;
00176     snprintf(date + 10, sizeof(date) - 10, " %.2s:%.2s", t->value, t->value + 2);
00177     av_dict_set(m, "TIME", NULL, 0);
00178     av_dict_set(m, "TIM",  NULL, 0);
00179 
00180 finish:
00181     if (date[0])
00182         av_dict_set(m, "date", date, 0);
00183 }
00184 
00185 static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags)
00186 {
00187     int isv34, tlen, unsync;
00188     char tag[5];
00189     int64_t next, end = avio_tell(s->pb) + len;
00190     int taghdrlen;
00191     const char *reason = NULL;
00192     AVIOContext pb;
00193     unsigned char *buffer = NULL;
00194     int buffer_size = 0;
00195 
00196     switch (version) {
00197     case 2:
00198         if (flags & 0x40) {
00199             reason = "compression";
00200             goto error;
00201         }
00202         isv34 = 0;
00203         taghdrlen = 6;
00204         break;
00205 
00206     case 3:
00207     case 4:
00208         isv34 = 1;
00209         taghdrlen = 10;
00210         break;
00211 
00212     default:
00213         reason = "version";
00214         goto error;
00215     }
00216 
00217     unsync = flags & 0x80;
00218 
00219     if (isv34 && flags & 0x40) { /* Extended header present, just skip over it */
00220         int extlen = get_size(s->pb, 4);
00221         if (version == 4)
00222             extlen -= 4;     // in v2.4 the length includes the length field we just read
00223 
00224         if (extlen < 0) {
00225             reason = "invalid extended header length";
00226             goto error;
00227         }
00228         avio_skip(s->pb, extlen);
00229     }
00230 
00231     while (len >= taghdrlen) {
00232         unsigned int tflags = 0;
00233         int tunsync = 0;
00234 
00235         if (isv34) {
00236             avio_read(s->pb, tag, 4);
00237             tag[4] = 0;
00238             if(version==3){
00239                 tlen = avio_rb32(s->pb);
00240             }else
00241                 tlen = get_size(s->pb, 4);
00242             tflags = avio_rb16(s->pb);
00243             tunsync = tflags & ID3v2_FLAG_UNSYNCH;
00244         } else {
00245             avio_read(s->pb, tag, 3);
00246             tag[3] = 0;
00247             tlen = avio_rb24(s->pb);
00248         }
00249         if (tlen <= 0 || tlen > len - taghdrlen) {
00250             av_log(s, AV_LOG_WARNING, "Invalid size in frame %s, skipping the rest of tag.\n", tag);
00251             break;
00252         }
00253         len -= taghdrlen + tlen;
00254         next = avio_tell(s->pb) + tlen;
00255 
00256         if (tflags & ID3v2_FLAG_DATALEN) {
00257             avio_rb32(s->pb);
00258             tlen -= 4;
00259         }
00260 
00261         if (tflags & (ID3v2_FLAG_ENCRYPTION | ID3v2_FLAG_COMPRESSION)) {
00262             av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag);
00263             avio_skip(s->pb, tlen);
00264         } else if (tag[0] == 'T') {
00265             if (unsync || tunsync) {
00266                 int i, j;
00267                 av_fast_malloc(&buffer, &buffer_size, tlen);
00268                 if (!buffer) {
00269                     av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", tlen);
00270                     goto seek;
00271                 }
00272                 for (i = 0, j = 0; i < tlen; i++, j++) {
00273                     buffer[j] = avio_r8(s->pb);
00274                     if (j > 0 && !buffer[j] && buffer[j - 1] == 0xff) {
00275                         /* Unsynchronised byte, skip it */
00276                         j--;
00277                     }
00278                 }
00279                 ffio_init_context(&pb, buffer, j, 0, NULL, NULL, NULL, NULL);
00280                 read_ttag(s, &pb, j, tag);
00281             } else {
00282                 read_ttag(s, s->pb, tlen, tag);
00283             }
00284         }
00285         else if (!tag[0]) {
00286             if (tag[1])
00287                 av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding");
00288             avio_skip(s->pb, tlen);
00289             break;
00290         }
00291         /* Skip to end of tag */
00292 seek:
00293         avio_seek(s->pb, next, SEEK_SET);
00294     }
00295 
00296     if (version == 4 && flags & 0x10) /* Footer preset, always 10 bytes, skip over it */
00297         end += 10;
00298 
00299   error:
00300     if (reason)
00301         av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n", version, reason);
00302     avio_seek(s->pb, end, SEEK_SET);
00303     av_free(buffer);
00304     return;
00305 }
00306 
00307 void ff_id3v2_read(AVFormatContext *s, const char *magic)
00308 {
00309     int len, ret;
00310     uint8_t buf[ID3v2_HEADER_SIZE];
00311     int     found_header;
00312     int64_t off;
00313 
00314     do {
00315         /* save the current offset in case there's nothing to read/skip */
00316         off = avio_tell(s->pb);
00317         ret = avio_read(s->pb, buf, ID3v2_HEADER_SIZE);
00318         if (ret != ID3v2_HEADER_SIZE)
00319             break;
00320             found_header = ff_id3v2_match(buf, magic);
00321             if (found_header) {
00322             /* parse ID3v2 header */
00323             len = ((buf[6] & 0x7f) << 21) |
00324                   ((buf[7] & 0x7f) << 14) |
00325                   ((buf[8] & 0x7f) << 7) |
00326                    (buf[9] & 0x7f);
00327             ff_id3v2_parse(s, len, buf[3], buf[5]);
00328         } else {
00329             avio_seek(s->pb, off, SEEK_SET);
00330         }
00331     } while (found_header);
00332     ff_metadata_conv(&s->metadata, NULL, ff_id3v2_34_metadata_conv);
00333     ff_metadata_conv(&s->metadata, NULL, ff_id3v2_2_metadata_conv);
00334     ff_metadata_conv(&s->metadata, NULL, ff_id3v2_4_metadata_conv);
00335     merge_date(&s->metadata);
00336 }
00337 
00338 const AVMetadataConv ff_id3v2_34_metadata_conv[] = {
00339     { "TALB", "album"},
00340     { "TCOM", "composer"},
00341     { "TCON", "genre"},
00342     { "TCOP", "copyright"},
00343     { "TENC", "encoded_by"},
00344     { "TIT2", "title"},
00345     { "TLAN", "language"},
00346     { "TPE1", "artist"},
00347     { "TPE2", "album_artist"},
00348     { "TPE3", "performer"},
00349     { "TPOS", "disc"},
00350     { "TPUB", "publisher"},
00351     { "TRCK", "track"},
00352     { "TSSE", "encoder"},
00353     { 0 }
00354 };
00355 
00356 const AVMetadataConv ff_id3v2_4_metadata_conv[] = {
00357     { "TDRL", "date"},
00358     { "TDRC", "date"},
00359     { "TDEN", "creation_time"},
00360     { "TSOA", "album-sort"},
00361     { "TSOP", "artist-sort"},
00362     { "TSOT", "title-sort"},
00363     { 0 }
00364 };
00365 
00366 const AVMetadataConv ff_id3v2_2_metadata_conv[] = {
00367     { "TAL",  "album"},
00368     { "TCO",  "genre"},
00369     { "TT2",  "title"},
00370     { "TEN",  "encoded_by"},
00371     { "TP1",  "artist"},
00372     { "TP2",  "album_artist"},
00373     { "TP3",  "performer"},
00374     { "TRK",  "track"},
00375     { 0 }
00376 };
00377 
00378 
00379 const char ff_id3v2_tags[][4] = {
00380    "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDLY", "TENC", "TEXT",
00381    "TFLT", "TIT1", "TIT2", "TIT3", "TKEY", "TLAN", "TLEN", "TMED",
00382    "TOAL", "TOFN", "TOLY", "TOPE", "TOWN", "TPE1", "TPE2", "TPE3",
00383    "TPE4", "TPOS", "TPUB", "TRCK", "TRSN", "TRSO", "TSRC", "TSSE",
00384    { 0 },
00385 };
00386 
00387 const char ff_id3v2_4_tags[][4] = {
00388    "TDEN", "TDOR", "TDRC", "TDRL", "TDTG", "TIPL", "TMCL", "TMOO",
00389    "TPRO", "TSOA", "TSOP", "TSOT", "TSST",
00390    { 0 },
00391 };
00392 
00393 const char ff_id3v2_3_tags[][4] = {
00394    "TDAT", "TIME", "TORY", "TRDA", "TSIZ", "TYER",
00395    { 0 },
00396 };