ref: d8185e7273233384dcf32da043c28446082df1d2
author: Sigrid Solveig Haflínudóttir <ftrvxmtrx@gmail.com>
date: Wed Oct 9 15:21:55 EDT 2019
squash
--- /dev/null
+++ b/.gitignore
@@ -1,0 +1,4 @@
+*.[1-9]
+*.o
+*.out
+result
--- /dev/null
+++ b/LICENSE
@@ -1,0 +1,18 @@
+Copyright © 2013-2019 Sigrid Solveig Haflínudóttir
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- /dev/null
+++ b/README.md
@@ -1,0 +1,41 @@
+libtags
+=======
+
+A cross-platform library for reading tags, designed for highly constrained environments.
+
+Comparison to id3lib and taglib:
+
+| | libtags | id3lib | taglib |
+|:---------------|:----------------|:-----------------|:-----------------|
+| ID3v2.4 | yes | no | yes |
+| Ogg/Vorbis | yes | no | yes |
+| FLAC | yes | no | yes |
+| m4a | yes | no | yes |
+| replay gain | yes | no | ??? |
+| size | tiny | bloated | more bloated |
+| license | MIT | LGPL | LGPL/MPL |
+| written in | C | C++ | C++ |
+| memory | no allocations | allocates memory | allocates memory |
+| thread safe | yes | ??? | ??? |
+| speed | ultra-fast | slow | fast |
+| tag writing | no, not a goal | yes | yes |
+| Plan 9 support | yes, native | no | no |
+
+CPU time (784 files: mp3, ogg, flac):
+
+| | libtags | taglib |
+|:---------------|:-----------------|:-----------------|
+| files cached | real 0m0.027s | real 0m0.155s |
+| | user 0m0.014s | user 0m0.102s |
+| | sys 0m0.012s | sys 0m0.053s |
+| | | |
+| cache dropped | real 0m1.158s | real 0m1.628s |
+| | user 0m0.024s | user 0m0.211s |
+| | sys 0m0.132s | sys 0m0.187s |
+
+## Usage
+
+Just compile it to an archive (`.a`) and link to your program. Use it in your code
+by including `tags.h`, that's the API. Documentation is in the header.
+
+See `examples/readtags.c`.
--- /dev/null
+++ b/default.nix
@@ -1,0 +1,22 @@
+{ stdenv, mk, pkgconfig, fetchgitLocal }:
+
+stdenv.mkDerivation rec {
+ name = "libtags";
+ src = ./.;#fetchgitLocal ./.;
+
+ buildInputs = [ mk ];
+ propagatedBuildInputs = [ pkgconfig ];
+ enableParallelBuilding = true;
+
+ installPhase = ''
+ cd src && mk -f mkfile.nix install && cd ..
+ cd examples && mk -f mkfile.nix install INCLUDES=-I$out/include LIBS="-L$out/lib -ltags"
+ '';
+
+ meta = {
+ description = "A cross-platform library for reading tags, designed for highly constrained environments";
+ maintainers = with stdenv.lib.maintainers; [ ftrvxmtrx ];
+ platforms = stdenv.lib.platforms.unix;
+ license = stdenv.lib.licenses.mit;
+ };
+}
--- /dev/null
+++ b/examples/mkfile
@@ -1,0 +1,13 @@
+</$objtype/mkfile
+
+TARG=readtags
+
+OFILES=\
+ readtags.$O\
+
+BIN=/$objtype/bin
+
+HFILES=\
+ /sys/include/tags.h\
+
+</sys/src/cmd/mkone
--- /dev/null
+++ b/examples/mkfile.nix
@@ -1,0 +1,11 @@
+TARG=readtags
+
+<$MKRULES/defaults
+
+OFILES=\
+ readtags.$O\
+
+HFILES=\
+ tags.h\
+
+<$MKRULES/mkone
--- /dev/null
+++ b/examples/readtags.c
@@ -1,0 +1,108 @@
+#ifdef __unix__
+#define _DEFAULT_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+#define print printf
+#define seek lseek
+#define nil NULL
+#define OREAD O_RDONLY
+#define USED(x) (void)x
+#else
+#include <u.h>
+#include <libc.h>
+#endif
+#include <tags.h>
+
+typedef struct Aux Aux;
+
+struct Aux
+{
+ int fd;
+};
+
+static const char *t2s[] =
+{
+ [Tartist] = "artist",
+ [Talbum] = "album",
+ [Ttitle] = "title",
+ [Tdate] = "date",
+ [Ttrack] = "track",
+ [Talbumgain] = "albumgain",
+ [Talbumpeak] = "albumpeak",
+ [Ttrackgain] = "trackgain",
+ [Ttrackpeak] = "trackpeak",
+ [Tgenre] = "genre",
+ [Timage] = "image",
+};
+
+static void
+cb(Tagctx *ctx, int t, const char *v, int offset, int size, Tagread f)
+{
+ USED(ctx); USED(offset); USED(size); USED(f);
+ if(t == Timage)
+ print("%-12s %s %d %d\n", t2s[t], v, offset, size);
+ else
+ print("%-12s %s\n", t2s[t], v);
+}
+
+static int
+ctxread(Tagctx *ctx, void *buf, int cnt)
+{
+ Aux *aux = ctx->aux;
+ return read(aux->fd, buf, cnt);
+}
+
+static int
+ctxseek(Tagctx *ctx, int offset, int whence)
+{
+ Aux *aux = ctx->aux;
+ return seek(aux->fd, offset, whence);
+}
+
+int
+main(int argc, char **argv)
+{
+ int i;
+ char buf[256];
+ Aux aux;
+ Tagctx ctx =
+ {
+ .read = ctxread,
+ .seek = ctxseek,
+ .tag = cb,
+ .buf = buf,
+ .bufsz = sizeof(buf),
+ .aux = &aux,
+ };
+
+ if(argc < 2){
+ print("usage: readtags FILE...\n");
+ return -1;
+ }
+
+ for(i = 1; i < argc; i++){
+ print("*** %s\n", argv[i]);
+ if((aux.fd = open(argv[i], OREAD)) < 0)
+ print("failed to open\n");
+ else{
+ ctx.filename = argv[i];
+ if(tagsget(&ctx) != 0)
+ print("no tags or failed to read tags\n");
+ else{
+ if(ctx.duration > 0)
+ print("%-12s %d ms\n", "duration", ctx.duration);
+ if(ctx.samplerate > 0)
+ print("%-12s %d\n", "samplerate", ctx.samplerate);
+ if(ctx.channels > 0)
+ print("%-12s %d\n", "channels", ctx.channels);
+ if(ctx.bitrate > 0)
+ print("%-12s %d\n", "bitrate", ctx.bitrate);
+ }
+ close(aux.fd);
+ }
+ print("\n");
+ }
+ return 0;
+}
--- /dev/null
+++ b/src/8859.c
@@ -1,0 +1,29 @@
+/* http://en.wikipedia.org/wiki/ISO/IEC_8859-1 */
+#include "tagspriv.h"
+
+int
+iso88591toutf8(uchar *o, int osz, const uchar *s, int sz)
+{
+ int i;
+
+ for(i = 0; i < sz && osz > 1 && s[i] != 0; i++){
+ if(s[i] >= 0xa0 && osz < 3)
+ break;
+
+ if(s[i] >= 0xc0){
+ *o++ = 0xc3;
+ *o++ = s[i] - 0x40;
+ osz--;
+ }else if(s[i] >= 0xa0){
+ *o++ = 0xc2;
+ *o++ = s[i];
+ osz--;
+ }else{
+ *o++ = s[i];
+ }
+ osz--;
+ }
+
+ *o = 0;
+ return i;
+}
--- /dev/null
+++ b/src/flac.c
@@ -1,0 +1,105 @@
+/* https://xiph.org/flac/format.html */
+#include "tagspriv.h"
+
+#define beu3(d) ((d)[0]<<16 | (d)[1]<<8 | (d)[2]<<0)
+#define beuint(d) (uint)((d)[0]<<24 | (d)[1]<<16 | (d)[2]<<8 | (d)[3]<<0)
+#define leuint(d) (uint)((d)[3]<<24 | (d)[2]<<16 | (d)[1]<<8 | (d)[0]<<0)
+
+int
+tagflac(Tagctx *ctx)
+{
+ uchar *d;
+ int sz, last;
+ uvlong g;
+
+ d = (uchar*)ctx->buf;
+ /* 8 bytes for marker, block type, length. 18 bytes for the stream info */
+ if(ctx->read(ctx, d, 8+18) != 8+18 || memcmp(d, "fLaC\x00", 5) != 0)
+ return -1;
+
+ sz = beu3(&d[5]); /* size of the stream info */
+ ctx->samplerate = beu3(&d[18]) >> 4;
+ ctx->channels = ((d[20]>>1) & 7) + 1;
+ g = (uvlong)(d[21] & 0xf)<<32 | beu3(&d[22])<<8 | d[25];
+ ctx->duration = g * 1000 / ctx->samplerate;
+
+ /* skip the rest of the stream info */
+ if(ctx->seek(ctx, sz-18, 1) != 8+sz)
+ return -1;
+
+ for(last = 0; !last;){
+ if(ctx->read(ctx, d, 4) != 4)
+ return -1;
+
+ sz = beu3(&d[1]);
+ if((d[0] & 0x80) != 0)
+ last = 1;
+
+ if((d[0] & 0x7f) == 6){ /* 6 = picture */
+ int n, offset;
+ char *mime;
+
+ if(sz < 16 || ctx->read(ctx, d, 8) != 8) /* type, mime length */
+ return -1;
+ sz -= 8;
+ n = beuint(&d[4]);
+ mime = ctx->buf+20;
+ if(n >= sz || n >= ctx->bufsz-1 || ctx->read(ctx, mime, n) != n)
+ return -1;
+ mime[n] = 0;
+ ctx->read(ctx, d, 4); /* description */
+ offset = beuint(d) + ctx->seek(ctx, 0, 1) + 20;
+ ctx->read(ctx, d, 20);
+ n = beuint(&d[16]);
+ tagscallcb(ctx, Timage, mime, offset, n, nil);
+ }else if((d[0] & 0x7f) == 4){ /* 4 = vorbis comment */
+ int i, numtags, tagsz, vensz;
+ char *k, *v;
+
+ if(sz < 12 || ctx->read(ctx, d, 4) != 4)
+ return -1;
+
+ sz -= 4;
+ vensz = leuint(d);
+ if(vensz < 0 || vensz > sz-8)
+ return -1;
+ /* skip vendor, read the number of tags */
+ if(ctx->seek(ctx, vensz, 1) < 0 || ctx->read(ctx, d, 4) != 4)
+ return -1;
+ sz -= 4;
+ numtags = leuint(d);
+
+ for(i = 0; i < numtags && sz > 4; i++){
+ if(ctx->read(ctx, d, 4) != 4)
+ return -1;
+ tagsz = leuint(d);
+ sz -= 4;
+ if(tagsz > sz)
+ return -1;
+
+ /* if it doesn't fit, ignore it */
+ if(tagsz+1 > ctx->bufsz){
+ if(ctx->seek(ctx, tagsz, 1) < 0)
+ return -1;
+ continue;
+ }
+
+ k = ctx->buf;
+ if(ctx->read(ctx, k, tagsz) != tagsz)
+ return -1;
+ /* some tags have a stupid '\r'; ignore */
+ if(k[tagsz-1] == '\r')
+ k[tagsz-1] = 0;
+ k[tagsz] = 0;
+
+ if((v = strchr(k, '=')) != nil){
+ *v++ = 0;
+ cbvorbiscomment(ctx, k, v);
+ }
+ }
+ }else if(ctx->seek(ctx, sz, 1) <= 0)
+ return -1;
+ }
+
+ return 0;
+}
--- /dev/null
+++ b/src/id3genres.c
@@ -1,0 +1,42 @@
+#include "tagspriv.h"
+
+const char *id3genres[Numgenre] =
+{
+ "Blues", "Classic Rock", "Country", "Dance", "Disco", "Funk",
+ "Grunge", "Hip-Hop", "Jazz", "Metal", "New Age", "Oldies",
+ "Other", "Pop", "Rhythm and Blues", "Rap", "Reggae", "Rock",
+ "Techno", "Industrial", "Alternative", "Ska", "Death Metal",
+ "Pranks", "Soundtrack", "Euro-Techno", "Ambient", "Trip-Hop",
+ "Vocal", "Jazz & Funk", "Fusion", "Trance", "Classical",
+ "Instrumental", "Acid", "House", "Game", "Sound Clip", "Gospel",
+ "Noise", "Alternative Rock", "Bass", "Soul", "Punk rock", "Space",
+ "Meditative", "Instrumental Pop", "Instrumental Rock", "Ethnic",
+ "Gothic", "Darkwave", "Techno-Industrial", "Electronic",
+ "Pop-Folk", "Eurodance", "Dream", "Southern Rock", "Comedy",
+ "Cult", "Gangsta", "Top 40", "Christian Rap", "Pop/Funk",
+ "Jungle", "Native American", "Cabaret", "New Wave", "Psychedelic",
+ "Rave", "Showtunes", "Trailer", "Lo-Fi", "Tribal", "Acid Punk",
+ "Acid Jazz", "Polka", "Retro", "Musical", "Rock & Roll", "Hard Rock",
+ "Folk", "Folk-Rock", "National Folk", "Swing", "Fast Fusion",
+ "Bebop", "Latin", "Revival", "Celtic", "Bluegrass", "Avantgarde",
+ "Gothic Rock", "Progressive Rock", "Psychedelic Rock", "Symphonic Rock",
+ "Slow Rock", "Big Band", "Chorus", "Easy Listening", "Acoustic",
+ "Humour", "Speech", "Chanson", "Opera", "Chamber Music", "Sonata",
+ "Symphony", "Booty Bass", "Primus", "Porn groove", "Satire", "Slow Jam",
+ "Club", "Tango", "Samba", "Folklore", "Ballad", "Power Ballad",
+ "Rhythmic Soul", "Freestyle", "Duet", "Punk rock", "Drum Solo", "A capella",
+ "Euro-House", "Dance Hall", "Goa Trance", "Drum & Bass",
+ "Club-House", "Hardcore Techno", "Terror", "Indie", "BritPop",
+ "Afro-punk", "Polsk Punk", "Beat", "Christian Gangsta Rap", "Heavy Metal",
+ "Black Metal", "Crossover", "Contemporary Christian", "Christian Rock",
+ "Merengue", "Salsa", "Thrash Metal", "Anime", "Jpop", "Synthpop",
+ "Abstract", "Art Rock", "Baroque", "Bhangra", "Big Beat",
+ "Breakbeat", "Chillout", "Downtempo", "Dub", "EBM", "Eclectic",
+ "Electro", "Electroclash", "Emo", "Experimental", "Garage",
+ "Global", "IDM", "Illbient", "Industro-Goth", "Jam Band",
+ "Krautrock", "Leftfield", "Lounge", "Math Rock", "New Romantic",
+ "Nu-Breakz", "Post-Punk", "Post-Rock", "Psytrance", "Shoegaze",
+ "Space Rock", "Trop Rock", "World Music", "Neoclassical",
+ "Audiobook", "Audio Theatre", "Neue Deutsche Welle", "Podcast",
+ "Indie Rock", "G-Funk", "Dubstep", "Garage Rock", "Psybient",
+};
--- /dev/null
+++ b/src/id3v1.c
@@ -1,0 +1,48 @@
+/*
+ * http://en.wikipedia.org/wiki/ID3
+ * Space-padded strings are mentioned there. This is wrong and is a lie.
+ */
+#include "tagspriv.h"
+
+enum
+{
+ Insz = 128,
+ Outsz = 61,
+};
+
+int
+tagid3v1(Tagctx *ctx)
+{
+ uchar *in, *out;
+
+ if(ctx->bufsz < Insz+Outsz)
+ return -1;
+ in = (uchar*)ctx->buf;
+ out = in + Insz;
+
+ if(ctx->seek(ctx, -Insz, 2) < 0)
+ return -1;
+ if(ctx->read(ctx, in, Insz) != Insz || memcmp(in, "TAG", 3) != 0)
+ return -1;
+
+ if((ctx->found & 1<<Ttitle) == 0 && iso88591toutf8(out, Outsz, &in[3], 30) > 0)
+ txtcb(ctx, Ttitle, out);
+ if((ctx->found & 1<<Tartist) == 0 && iso88591toutf8(out, Outsz, &in[33], 30) > 0)
+ txtcb(ctx, Tartist, out);
+ if((ctx->found & 1<<Talbum) == 0 && iso88591toutf8(out, Outsz, &in[63], 30) > 0)
+ txtcb(ctx, Talbum, out);
+
+ in[93+4] = 0;
+ if((ctx->found & 1<<Tdate) == 0 && in[93] != 0)
+ txtcb(ctx, Tdate, &in[93]);
+
+ if((ctx->found & 1<<Ttrack) == 0 && in[125] == 0 && in[126] > 0){
+ snprint((char*)out, Outsz, "%d", in[126]);
+ txtcb(ctx, Ttrack, out);
+ }
+
+ if((ctx->found & 1<<Tgenre) == 0 && in[127] < Numgenre)
+ txtcb(ctx, Tgenre, id3genres[in[127]]);
+
+ return 0;
+}
--- /dev/null
+++ b/src/id3v2.c
@@ -1,0 +1,450 @@
+/*
+ * Have fun reading the following:
+ *
+ * http://id3.org/id3v2.4.0-structure
+ * http://id3.org/id3v2.4.0-frames
+ * http://id3.org/d3v2.3.0
+ * http://id3.org/id3v2-00
+ * http://mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm
+ * http://wiki.hydrogenaud.io/index.php?title=MP3#VBRI.2C_XING.2C_and_LAME_headers
+ * http://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#VBRIHeader
+ */
+#include "tagspriv.h"
+
+#define synchsafe(d) (uint)(((d)[0]&127)<<21 | ((d)[1]&127)<<14 | ((d)[2]&127)<<7 | ((d)[3]&127)<<0)
+#define beuint(d) (uint)((d)[0]<<24 | (d)[1]<<16 | (d)[2]<<8 | (d)[3]<<0)
+
+static int
+v2cb(Tagctx *ctx, char *k, char *v)
+{
+ k++;
+ if(strcmp(k, "AL") == 0 || strcmp(k, "ALB") == 0)
+ txtcb(ctx, Talbum, v);
+ else if(strcmp(k, "PE1") == 0 || strcmp(k, "PE2") == 0 || strcmp(k, "P1") == 0 || strcmp(k, "P2") == 0)
+ txtcb(ctx, Tartist, v);
+ else if(strcmp(k, "IT2") == 0 || strcmp(k, "T2") == 0)
+ txtcb(ctx, Ttitle, v);
+ else if(strcmp(k, "YE") == 0 || strcmp(k, "YER") == 0 || strcmp(k, "DRC") == 0)
+ txtcb(ctx, Tdate, v);
+ else if(strcmp(k, "RK") == 0 || strcmp(k, "RCK") == 0)
+ txtcb(ctx, Ttrack, v);
+ else if(strcmp(k, "CO") == 0 || strcmp(k, "CON") == 0){
+ for(; v[0]; v++){
+ if(v[0] == '(' && v[1] <= '9' && v[1] >= '0'){
+ int i = atoi(&v[1]);
+ if(i < Numgenre)
+ txtcb(ctx, Tgenre, id3genres[i]);
+ for(v++; v[0] && v[0] != ')'; v++);
+ v--;
+ }else if(v[0] != '(' && v[0] != ')'){
+ txtcb(ctx, Tgenre, v);
+ break;
+ }
+ }
+ }else if(strcmp(k, "XXX") == 0 && strncmp(v, "REPLAYGAIN_", 11) == 0){
+ int type = -1;
+ v += 11;
+ if(strncmp(v, "TRACK_", 6) == 0){
+ v += 6;
+ if(strcmp(v, "GAIN") == 0)
+ type = Ttrackgain;
+ else if(strcmp(v, "PEAK") == 0)
+ type = Ttrackpeak;
+ }else if(strncmp(v, "ALBUM_", 6) == 0){
+ v += 6;
+ if(strcmp(v, "GAIN") == 0)
+ type = Talbumgain;
+ else if(strcmp(v, "PEAK") == 0)
+ type = Talbumpeak;
+ }
+ if(type >= 0)
+ txtcb(ctx, type, v+5);
+ else
+ return 0;
+ }else
+ return 0;
+ return 1;
+}
+
+static int
+rva2(Tagctx *ctx, char *tag, int sz)
+{
+ uchar *b, *end;
+
+ if((b = memchr(tag, 0, sz)) == nil)
+ return -1;
+ b++;
+ for(end = (uchar*)tag+sz; b+4 < end; b += 5){
+ int type = b[0];
+ float peak;
+ float va = (float)(b[1]<<8 | b[2]) / 512.0f;
+
+ if(b[3] == 24){
+ peak = (float)(b[4]<<16 | b[5]<<8 | b[6]) / 32768.0f;
+ b += 2;
+ }else if(b[3] == 16){
+ peak = (float)(b[4]<<8 | b[5]) / 32768.0f;
+ b += 1;
+ }else if(b[3] == 8){
+ peak = (float)b[4] / 32768.0f;
+ }else
+ return -1;
+
+ if(type == 1){ /* master volume */
+ char vas[16], peaks[8];
+ snprint(vas, sizeof(vas), "%+.5f dB", va);
+ snprint(peaks, sizeof(peaks), "%.5f", peak);
+ vas[sizeof(vas)-1] = 0;
+ peaks[sizeof(peaks)-1] = 0;
+
+ if(strcmp((char*)tag, "track") == 0){
+ txtcb(ctx, Ttrackgain, vas);
+ txtcb(ctx, Ttrackpeak, peaks);
+ }else if(strcmp((char*)tag, "album") == 0){
+ txtcb(ctx, Talbumgain, vas);
+ txtcb(ctx, Talbumpeak, peaks);
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+static int
+resync(uchar *b, int sz)
+{
+ int i;
+
+ if(sz < 4)
+ return sz;
+ for(i = 0; i < sz-2; i++){
+ if(b[i] == 0xff && b[i+1] == 0x00 && (b[i+2] & 0xe0) == 0xe0){
+ memmove(&b[i+1], &b[i+2], sz-i-2);
+ sz--;
+ }
+ }
+ return sz;
+}
+
+static int
+unsyncread(void *buf, int *sz)
+{
+ int i;
+ uchar *b;
+
+ b = buf;
+ for(i = 0; i < *sz; i++){
+ if(b[i] == 0xff){
+ if(i+1 >= *sz || (b[i+1] == 0x00 && i+2 >= *sz))
+ break;
+ if(b[i+1] == 0x00 && (b[i+2] & 0xe0) == 0xe0){
+ memmove(&b[i+1], &b[i+2], *sz-i-2);
+ (*sz)--;
+ }
+ }
+ }
+ return i;
+}
+
+static int
+nontext(Tagctx *ctx, uchar *d, int tsz, int unsync)
+{
+ int n, offset;
+ char *b, *tag;
+ Tagread f;
+
+ tag = ctx->buf;
+ n = 0;
+ f = unsync ? unsyncread : nil;
+ if(strcmp((char*)d, "APIC") == 0){
+ offset = ctx->seek(ctx, 0, 1);
+ if((n = ctx->read(ctx, tag, 256)) == 256){ /* APIC mime and description should fit */
+ b = tag + 1; /* mime type */
+ for(n = 1 + strlen(b) + 2; n < 253; n++){
+ if(tag[0] == 0 || tag[0] == 3){ /* one zero byte */
+ if(tag[n] == 0){
+ n++;
+ break;
+ }
+ }else if(tag[n] == 0 && tag[n+1] == 0 && tag[n+2] == 0){
+ n += 3;
+ break;
+ }
+ }
+ tagscallcb(ctx, Timage, b, offset+n, tsz-n, f);
+ n = 256;
+ }
+ }else if(strcmp((char*)d, "PIC") == 0){
+ offset = ctx->seek(ctx, 0, 1);
+ if((n = ctx->read(ctx, tag, 256)) == 256){ /* PIC description should fit */
+ b = tag + 1; /* mime type */
+ for(n = 5; n < 253; n++){
+ if(tag[0] == 0 || tag[0] == 3){ /* one zero byte */
+ if(tag[n] == 0){
+ n++;
+ break;
+ }
+ }else if(tag[n] == 0 && tag[n+1] == 0 && tag[n+2] == 0){
+ n += 3;
+ break;
+ }
+ }
+ tagscallcb(ctx, Timage, strcmp(b, "JPG") == 0 ? "image/jpeg" : "image/png", offset+n, tsz-n, f);
+ n = 256;
+ }
+ }else if(strcmp((char*)d, "RVA2") == 0 && tsz >= 6+5){
+ /* replay gain. 6 = "track\0", 5 = other */
+ if(ctx->bufsz >= tsz && (n = ctx->read(ctx, tag, tsz)) == tsz)
+ rva2(ctx, tag, unsync ? resync((uchar*)tag, n) : n);
+ }
+
+ return ctx->seek(ctx, tsz-n, 1) < 0 ? -1 : 0;
+}
+
+static int
+text(Tagctx *ctx, uchar *d, int tsz, int unsync)
+{
+ char *b, *tag;
+
+ if(ctx->bufsz >= tsz+1){
+ /* place the data at the end to make best effort at charset conversion */
+ tag = &ctx->buf[ctx->bufsz - tsz - 1];
+ if(ctx->read(ctx, tag, tsz) != tsz)
+ return -1;
+ }else{
+ ctx->seek(ctx, tsz, 1);
+ return 0;
+ }
+
+ if(unsync)
+ tsz = resync((uchar*)tag, tsz);
+
+ tag[tsz] = 0;
+ b = &tag[1];
+
+ switch(tag[0]){
+ case 0: /* iso-8859-1 */
+ if(iso88591toutf8((uchar*)ctx->buf, ctx->bufsz, (uchar*)b, tsz) > 0)
+ v2cb(ctx, (char*)d, ctx->buf);
+ break;
+ case 1: /* utf-16 */
+ case 2:
+ if(utf16to8((uchar*)ctx->buf, ctx->bufsz, (uchar*)b, tsz) > 0)
+ v2cb(ctx, (char*)d, ctx->buf);
+ break;
+ case 3: /* utf-8 */
+ if(*b)
+ v2cb(ctx, (char*)d, b);
+ break;
+ }
+
+ return 0;
+}
+
+static int
+isid3(uchar *d)
+{
+ /* "ID3" version[2] flags[1] size[4] */
+ return (
+ d[0] == 'I' && d[1] == 'D' && d[2] == '3' &&
+ d[3] < 0xff && d[4] < 0xff &&
+ d[6] < 0x80 && d[7] < 0x80 && d[8] < 0x80 && d[9] < 0x80
+ );
+}
+
+static const uchar bitrates[4][4][16] = {
+ {
+ {0},
+ {0, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 72, 80, 0}, /* v2.5 III */
+ {0, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 72, 80, 0}, /* v2.5 II */
+ {0, 16, 24, 28, 32, 40, 48, 56, 64, 72, 80, 88, 96, 112, 128, 0}, /* v2.5 I */
+ },
+ { {0}, {0}, {0}, {0} },
+ {
+ {0},
+ {0, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 72, 80, 0}, /* v2 III */
+ {0, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 72, 80, 0}, /* v2 II */
+ {0, 16, 24, 28, 32, 40, 48, 56, 64, 72, 80, 88, 96, 112, 128, 0}, /* v2 I */
+ },
+ {
+ {0},
+ {0, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 0}, /* v1 III */
+ {0, 16, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 0}, /* v1 II */
+ {0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 0}, /* v1 I */
+ }
+};
+
+static const uint samplerates[4][4] = {
+ {11025, 12000, 8000, 0},
+ { 0, 0, 0, 0},
+ {22050, 24000, 16000, 0},
+ {44100, 48000, 32000, 0},
+};
+
+static const int chans[] = {2, 2, 2, 1};
+
+static const int samplesframe[4][4] = {
+ {0, 0, 0, 0},
+ {0, 576, 1152, 384},
+ {0, 576, 1152, 384},
+ {0, 1152, 1152, 384},
+};
+
+static void
+getduration(Tagctx *ctx, int offset)
+{
+ uvlong n, framelen, samplespf;
+ uchar *b;
+ uint x;
+ int xversion, xlayer, xbitrate;
+
+ if(ctx->read(ctx, ctx->buf, 64) != 64)
+ return;
+
+ x = beuint((uchar*)ctx->buf);
+ xversion = x >> 19 & 3;
+ xlayer = x >> 17 & 3;
+ xbitrate = x >> 12 & 0xf;
+ ctx->bitrate = 2000*(int)bitrates[xversion][xlayer][xbitrate];
+ samplespf = samplesframe[xversion][xlayer];
+
+ ctx->samplerate = samplerates[xversion][x >> 10 & 3];
+ ctx->channels = chans[x >> 6 & 3];
+
+ if(ctx->samplerate > 0){
+ framelen = (uvlong)144*ctx->bitrate / ctx->samplerate;
+ if((x & (1<<9)) != 0) /* padding */
+ framelen += xlayer == 3 ? 4 : 1; /* for I it's 4 bytes */
+
+ if(memcmp(&ctx->buf[0x24], "Info", 4) == 0 || memcmp(&ctx->buf[0x24], "Xing", 4) == 0){
+ b = (uchar*)ctx->buf + 0x28;
+ x = beuint(b); b += 4;
+ if((x & 1) != 0){ /* number of frames is set */
+ n = beuint(b); b += 4;
+ ctx->duration = n * samplespf * 1000 / ctx->samplerate;
+ }
+
+ if(ctx->duration == 0 && (x & 2) != 0 && framelen > 0){ /* file size is set */
+ n = beuint(b);
+ ctx->duration = n * samplespf * 1000 / framelen / ctx->samplerate;
+ }
+ }else if(memcmp(&ctx->buf[0x24], "VBRI", 4) == 0){
+ n = beuint((uchar*)&ctx->buf[0x32]);
+ ctx->duration = n * samplespf * 1000 / ctx->samplerate;
+
+ if(ctx->duration == 0 && framelen > 0){
+ n = beuint((uchar*)&ctx->buf[0x28]); /* file size */
+ ctx->duration = n * samplespf * 1000 / framelen / ctx->samplerate;
+ }
+ }
+ }
+
+ if(ctx->bitrate > 0 && ctx->duration == 0) /* worst case -- use real file size instead */
+ ctx->duration = (ctx->seek(ctx, 0, 2) - offset)/(ctx->bitrate / 1000) * 8;
+}
+
+int
+tagid3v2(Tagctx *ctx)
+{
+ int sz, exsz, framesz;
+ int ver, unsync, offset;
+ uchar d[10], *b;
+
+ if(ctx->read(ctx, d, sizeof(d)) != sizeof(d))
+ return -1;
+ if(!isid3(d)){ /* no tags, but the stream information is there */
+ if(d[0] != 0xff || (d[1] & 0xe0) != 0xe0)
+ return -1;
+ ctx->seek(ctx, -(int)sizeof(d), 1);
+ getduration(ctx, 0);
+ return 0;
+ }
+
+header:
+ ver = d[3];
+ unsync = d[5] & (1<<7);
+ sz = synchsafe(&d[6]);
+
+ if(ver == 2 && (d[5] & (1<<6)) != 0) /* compression */
+ return -1;
+
+ if(ver > 2){
+ if((d[5] & (1<<4)) != 0) /* footer */
+ sz -= 10;
+ if((d[5] & (1<<6)) != 0){ /* skip extended header */
+ if(ctx->read(ctx, d, 4) != 4)
+ return -1;
+ exsz = (ver >= 3) ? beuint(d) : synchsafe(d);
+ if(ctx->seek(ctx, exsz, 1) < 0)
+ return -1;
+ sz -= exsz;
+ }
+ }
+
+ framesz = (ver >= 3) ? 10 : 6;
+ for(; sz > framesz;){
+ int tsz, frameunsync;
+
+ if(ctx->read(ctx, d, framesz) != framesz)
+ return -1;
+ sz -= framesz;
+
+ /* return on padding */
+ if(memcmp(d, "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", framesz) == 0)
+ break;
+ if(ver >= 3){
+ tsz = (ver == 3) ? beuint(&d[4]) : synchsafe(&d[4]);
+ if(tsz < 0 || tsz > sz)
+ break;
+ frameunsync = d[9] & (1<<1);
+ d[4] = 0;
+
+ if((d[9] & 0x0c) != 0){ /* compression & encryption */
+ ctx->seek(ctx, tsz, 1);
+ sz -= tsz;
+ continue;
+ }
+ if(ver == 4 && (d[9] & 1<<0) != 0){ /* skip data length indicator */
+ ctx->seek(ctx, 4, 1);
+ sz -= 4;
+ tsz -= 4;
+ }
+ }else{
+ tsz = beuint(&d[3]) >> 8;
+ if(tsz > sz)
+ return -1;
+ frameunsync = 0;
+ d[3] = 0;
+ }
+ sz -= tsz;
+
+ if(d[0] == 'T' && text(ctx, d, tsz, unsync || frameunsync) != 0)
+ return -1;
+ else if(d[0] != 'T' && nontext(ctx, d, tsz, unsync || frameunsync) != 0)
+ return -1;
+ }
+
+ offset = ctx->seek(ctx, sz, 1);
+ sz = ctx->bufsz <= 2048 ? ctx->bufsz : 2048;
+ b = nil;
+ for(exsz = 0; exsz < 2048; exsz += sz){
+ if(ctx->read(ctx, ctx->buf, sz) != sz)
+ break;
+ for(b = (uchar*)ctx->buf; (b = memchr(b, 'I', sz - 1 - ((char*)b - ctx->buf))) != nil; b++){
+ ctx->seek(ctx, (char*)b - ctx->buf + offset + exsz, 0);
+ if(ctx->read(ctx, d, sizeof(d)) != sizeof(d))
+ return 0;
+ if(isid3(d))
+ goto header;
+ }
+ if((b = memchr(ctx->buf, 0xff, sz-1)) != nil && (b[1] & 0xe0) == 0xe0){
+ offset = ctx->seek(ctx, (char*)b - ctx->buf + offset + exsz, 0);
+ break;
+ }
+ }
+
+ if(b != nil)
+ getduration(ctx, offset);
+
+ return 0;
+}
--- /dev/null
+++ b/src/m4a.c
@@ -1,0 +1,153 @@
+/* http://wiki.multimedia.cx/?title=QuickTime_container */
+/* https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html */
+#include "tagspriv.h"
+
+#define beuint(d) (uint)((d)[0]<<24 | (d)[1]<<16 | (d)[2]<<8 | (d)[3]<<0)
+#define beuint16(d) (ushort)((d)[0]<<8 | (d)[1]<<0)
+
+int
+tagm4a(Tagctx *ctx)
+{
+ uvlong duration;
+ uchar *d;
+ int sz, type, dtype, i, skip, n;
+
+ d = (uchar*)ctx->buf;
+ /* 4 bytes for atom size, 4 for type, 4 for data - exect "ftyp" to come first */
+ if(ctx->read(ctx, d, 4+4+4) != 4+4+4 || memcmp(d+4, "ftypM4A ", 8) != 0)
+ return -1;
+ sz = beuint(d) - 4; /* already have 8 bytes */
+
+ for(;;){
+ if(ctx->seek(ctx, sz, 1) < 0)
+ return -1;
+ if(ctx->read(ctx, d, 4) != 4) /* size */
+ break;
+ sz = beuint(d);
+ if(sz == 0)
+ continue;
+ if(ctx->read(ctx, d, 4) != 4) /* type */
+ return -1;
+ if(sz < 8)
+ continue;
+
+ d[4] = 0;
+
+ if(memcmp(d, "meta", 4) == 0){
+ sz = 4;
+ continue;
+ }else if(
+ memcmp(d, "udta", 4) == 0 ||
+ memcmp(d, "ilst", 4) == 0 ||
+ memcmp(d, "trak", 4) == 0 ||
+ memcmp(d, "mdia", 4) == 0 ||
+ memcmp(d, "minf", 4) == 0 ||
+ memcmp(d, "stbl", 4) == 0){
+ sz = 0;
+ continue;
+ }else if(memcmp(d, "stsd", 4) == 0){
+ sz -= 8;
+ if(ctx->read(ctx, d, 8) != 8)
+ return -1;
+ sz -= 8;
+
+ for(i = beuint(&d[4]); i > 0 && sz > 0; i--){
+ if(ctx->read(ctx, d, 8) != 8) /* size + format */
+ return -1;
+ sz -= 8;
+ skip = beuint(d) - 8;
+
+ if(memcmp(&d[4], "mp4a", 4) == 0){ /* audio */
+ n = 6+2 + 2+4+2 + 2+2 + 2+2 + 4; /* read a bunch at once */
+ /* reserved+id, ver+rev+vendor, channels+bps, ?+?, sample rate */
+ if(ctx->read(ctx, d, n) != n)
+ return -1;
+ skip -= n;
+ sz -= n;
+ ctx->channels = beuint16(&d[16]);
+ ctx->samplerate = beuint(&d[24])>>16;
+ }
+
+ if(ctx->seek(ctx, skip, 1) < 0)
+ return -1;
+ sz -= skip;
+ }
+ continue;
+ }
+
+ sz -= 8;
+ type = -1;
+ if(memcmp(d, "\251nam", 4) == 0)
+ type = Ttitle;
+ else if(memcmp(d, "\251alb", 4) == 0)
+ type = Talbum;
+ else if(memcmp(d, "\251ART", 4) == 0)
+ type = Tartist;
+ else if(memcmp(d, "\251gen", 4) == 0 || memcmp(d, "gnre", 4) == 0)
+ type = Tgenre;
+ else if(memcmp(d, "\251day", 4) == 0)
+ type = Tdate;
+ else if(memcmp(d, "covr", 4) == 0)
+ type = Timage;
+ else if(memcmp(d, "trkn", 4) == 0)
+ type = Ttrack;
+ else if(memcmp(d, "mdhd", 4) == 0){
+ if(ctx->read(ctx, d, 4) != 4)
+ return -1;
+ sz -= 4;
+ duration = 0;
+ if(d[0] == 0){ /* version 0 */
+ if(ctx->read(ctx, d, 16) != 16)
+ return -1;
+ sz -= 16;
+ duration = beuint(&d[12]) / beuint(&d[8]);
+ }else if(d[1] == 1){ /* version 1 */
+ if(ctx->read(ctx, d, 28) != 28)
+ return -1;
+ sz -= 28;
+ duration = ((uvlong)beuint(&d[20])<<32 | beuint(&d[24])) / (uvlong)beuint(&d[16]);
+ }
+ ctx->duration = duration * 1000;
+ continue;
+ }
+
+ if(type < 0)
+ continue;
+
+ if(ctx->seek(ctx, 8, 1) < 0) /* skip size and "data" */
+ return -1;
+ sz -= 8;
+ if(ctx->read(ctx, d, 8) != 8) /* read data type and 4 bytes of whatever else */
+ return -1;
+ sz -= 8;
+ d[0] = 0;
+ dtype = beuint(d);
+
+ if(type == Ttrack){
+ if(ctx->read(ctx, d, 4) != 4)
+ return -1;
+ sz -= 4;
+ snprint((char*)d, ctx->bufsz, "%d", beuint(d));
+ txtcb(ctx, type, d);
+ }else if(type == Tgenre){
+ if(ctx->read(ctx, d, 2) != 2)
+ return -1;
+ sz -= 2;
+ if((i = d[1]-1) >= 0 && i < Numgenre)
+ txtcb(ctx, type, id3genres[i]);
+ }else if(dtype == 1){ /* text */
+ if(sz >= ctx->bufsz) /* skip tags that can't fit into memory. ">=" because of '\0' */
+ continue;
+ if(ctx->read(ctx, d, sz) != sz)
+ return -1;
+ d[sz] = 0;
+ txtcb(ctx, type, d);
+ sz = 0;
+ }else if(type == Timage && dtype == 13) /* jpeg cover image */
+ tagscallcb(ctx, Timage, "image/jpeg", ctx->seek(ctx, 0, 1), sz, nil);
+ else if(type == Timage && dtype == 14) /* png cover image */
+ tagscallcb(ctx, Timage, "image/png", ctx->seek(ctx, 0, 1), sz, nil);
+ }
+
+ return 0;
+}
--- /dev/null
+++ b/src/mkfile
@@ -1,0 +1,22 @@
+</$objtype/mkfile
+LIB=/$objtype/lib/libtags.a
+
+OFILES=\
+ 8859.$O\
+ flac.$O\
+ id3genres.$O\
+ id3v1.$O\
+ id3v2.$O\
+ m4a.$O\
+ tags.$O\
+ utf16.$O\
+ vorbis.$O\
+
+HFILES=\
+ /sys/include/tags.h\
+ tagspriv.h\
+
+/sys/include/%.h: %.h
+ cp $stem.h /sys/include/$stem.h
+
+</sys/src/cmd/mksyslib
--- /dev/null
+++ b/src/mkfile.nix
@@ -1,0 +1,21 @@
+LIB=libtags.a
+
+<$MKRULES/defaults
+
+OFILES=\
+ 8859.$O\
+ flac.$O\
+ id3genres.$O\
+ id3v1.$O\
+ id3v2.$O\
+ m4a.$O\
+ tags.$O\
+ utf16.$O\
+ vorbis.$O\
+
+%.$O: tagspriv.h
+
+HFILES=\
+ tags.h\
+
+<$MKRULES/mklib
--- /dev/null
+++ b/src/tags.c
@@ -1,0 +1,61 @@
+#include "tagspriv.h"
+
+typedef struct Getter Getter;
+
+struct Getter
+{
+ int (*f)(Tagctx *ctx);
+ const char *ext;
+ int extlen;
+ int format;
+};
+
+extern int tagvorbis(Tagctx *ctx);
+extern int tagflac(Tagctx *ctx);
+extern int tagid3v2(Tagctx *ctx);
+extern int tagid3v1(Tagctx *ctx);
+extern int tagm4a(Tagctx *ctx);
+
+static const Getter g[] =
+{
+ {tagid3v2, ".mp3", 4, Fmp3},
+ {tagid3v1, ".mp3", 4, Fmp3},
+ {tagvorbis, ".ogg", 4, Fogg},
+ {tagflac, ".flac", 5, Fflac},
+ {tagm4a, ".m4a", 4, Fm4a},
+};
+
+void
+tagscallcb(Tagctx *ctx, int type, const char *s, int offset, int size, Tagread f)
+{
+ ctx->found |= 1<<type;
+ ctx->tag(ctx, type, s, offset, size, f);
+ ctx->num++;
+}
+
+int
+tagsget(Tagctx *ctx)
+{
+ int i, len, res;
+
+ /* enough for having an extension */
+ len = 0;
+ if(ctx->filename != nil && (len = strlen(ctx->filename)) < 5)
+ return -1;
+ ctx->channels = ctx->samplerate = ctx->bitrate = ctx->duration = 0;
+ ctx->found = 0;
+ ctx->format = -1;
+ res = -1;
+ for(i = 0; i < (int)(sizeof(g)/sizeof(g[0])); i++){
+ if(ctx->filename == nil || memcmp(&ctx->filename[len-g[i].extlen], g[i].ext, g[i].extlen) == 0){
+ ctx->num = 0;
+ if(g[i].f(ctx) == 0 && ctx->num > 0){
+ res = 0;
+ ctx->format = g[i].format;
+ }
+ ctx->seek(ctx, 0, 0);
+ }
+ }
+
+ return res;
+}
--- /dev/null
+++ b/src/tags.h
@@ -1,0 +1,78 @@
+#ifndef __unix__
+#pragma lib "libtags.a"
+#endif
+
+typedef struct Tagctx Tagctx;
+typedef int (*Tagread)(void *buf, int *cnt);
+
+/* Tag type. */
+enum
+{
+ Tartist,
+ Talbum,
+ Ttitle,
+ Tdate, /* "2014", "2015/02/01", but the year goes first */
+ Ttrack, /* "1", "01", "1/4", but the track number goes first */
+ Talbumgain,
+ Talbumpeak,
+ Ttrackgain,
+ Ttrackpeak,
+ Tgenre,
+ Timage,
+};
+
+/* Format of the audio file. */
+enum
+{
+ Fmp3,
+ Fogg,
+ Fflac,
+ Fm4a,
+};
+
+/* Tag parser context. You need to set it properly before parsing an audio file using libtags. */
+struct Tagctx
+{
+ /* Set it to the filename. Doesn't have to be a full path, but extension must be there. */
+ const char *filename;
+
+ /* Read function. This is what libtags uses to read the file. */
+ int (*read)(Tagctx *ctx, void *buf, int cnt);
+
+ /* Seek function. This is what libtags uses to seek through the file. */
+ int (*seek)(Tagctx *ctx, int offset, int whence);
+
+ /* Callback that is used by libtags to inform about the tags of a file.
+ * "type" is the tag's type (Tartist, ...). "s" is the null-terminated string unless "type" is
+ * Timage. "offset" and "size" define the placement and size of the image cover ("type" = Timage)
+ * inside the file, and "f" is not NULL in case reading the image cover requires additional
+ * operations on the data, in which case you need to read the image cover as a stream and call this
+ * function to apply these operations on the contents read.
+ */
+ void (*tag)(Tagctx *ctx, int type, const char *s, int offset, int size, Tagread f);
+
+ /* Auxiliary data. Not used by libtags. */
+ void *aux;
+
+ /* Memory buffer to work in. */
+ char *buf;
+
+ /* Size of the buffer. Must be at least 256 bytes. */
+ int bufsz;
+
+ /* Here goes the stuff libtags sets. It should be accessed after tagsget() returns.
+ * A value of 0 means it's undefined.
+ */
+ int channels; /* Number of channels. */
+ int samplerate; /* Hz */
+ int bitrate; /* Bitrate, bits/s. */
+ int duration; /* ms */
+ int format; /* Fmp3, Fogg, Fflac, Fm4a */
+
+ /* Private, don't touch. */
+ int found;
+ int num;
+};
+
+/* Parse the file using this function. Returns 0 on success. */
+extern int tagsget(Tagctx *ctx);
--- /dev/null
+++ b/src/tagspriv.h
@@ -1,0 +1,49 @@
+#ifdef __unix__
+#define _DEFAULT_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <strings.h>
+#define snprint snprintf
+#define cistrcmp strcasecmp
+#define nil NULL
+typedef unsigned char uchar;
+typedef unsigned long long uvlong;
+#else
+#include <u.h>
+#include <libc.h>
+#endif
+#include "tags.h"
+
+enum
+{
+ Numgenre = 192,
+};
+
+extern const char *id3genres[Numgenre];
+
+/*
+ * Converts (to UTF-8) at most sz bytes of src and writes it to out buffer.
+ * Returns the number of bytes converted.
+ * You need sz*2+1 bytes for out buffer to be completely safe.
+ */
+int iso88591toutf8(uchar *out, int osz, const uchar *src, int sz);
+
+/*
+ * Converts (to UTF-8) at most sz bytes of src and writes it to out buffer.
+ * Returns the number of bytes converted or < 0 in case of error.
+ * You need sz*4+1 bytes for out buffer to be completely safe.
+ * UTF-16 defaults to big endian if there is no BOM.
+ */
+int utf16to8(uchar *out, int osz, const uchar *src, int sz);
+
+/*
+ * This one is common for both vorbis.c and flac.c
+ * It maps a string k to tag type and executes the callback from ctx.
+ * Returns 1 if callback was called, 0 otherwise.
+ */
+void cbvorbiscomment(Tagctx *ctx, char *k, char *v);
+
+void tagscallcb(Tagctx *ctx, int type, const char *s, int offset, int size, Tagread f);
+
+#define txtcb(ctx, type, s) tagscallcb(ctx, type, (const char*)s, 0, 0, nil)
--- /dev/null
+++ b/src/utf16.c
@@ -1,0 +1,59 @@
+/* Horror stories: http://en.wikipedia.org/wiki/UTF-16 */
+#include "tagspriv.h"
+
+#define rchr(s) (be ? ((s)[0]<<8 | (s)[1]) : ((s)[1]<<8 | (s)[0]))
+
+static const uchar mark[] = {0x00, 0x00, 0xc0, 0xe0, 0xf0};
+
+int
+utf16to8(uchar *o, int osz, const uchar *s, int sz)
+{
+ int i, be, c, c2, wr, j;
+
+ i = 0;
+ be = 1;
+ if(s[0] == 0xfe && s[1] == 0xff)
+ i += 2;
+ else if(s[0] == 0xff && s[1] == 0xfe){
+ be = 0;
+ i += 2;
+ }
+
+ for(; i < sz-1 && osz > 1;){
+ c = rchr(&s[i]);
+ i += 2;
+ if(c >= 0xd800 && c <= 0xdbff && i < sz-1){
+ c2 = rchr(&s[i]);
+ if(c2 >= 0xdc00 && c2 <= 0xdfff){
+ c = 0x10000 | (c - 0xd800)<<10 | (c2 - 0xdc00);
+ i += 2;
+ }else
+ return -1;
+ }else if(c >= 0xdc00 && c <= 0xdfff)
+ return -1;
+
+ if(c < 0x80)
+ wr = 1;
+ else if(c < 0x800)
+ wr = 2;
+ else if(c < 0x10000)
+ wr = 3;
+ else
+ wr = 4;
+
+ osz -= wr;
+ if(osz < 1)
+ break;
+
+ o += wr;
+ for(j = wr; j > 1; j--){
+ *(--o) = (c & 0xbf) | 0x80;
+ c >>= 6;
+ }
+ *(--o) = c | mark[wr];
+ o += wr;
+ }
+
+ *o = 0;
+ return i;
+}
--- /dev/null
+++ b/src/vorbis.c
@@ -1,0 +1,123 @@
+/*
+ * https://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-810005
+ * https://wiki.xiph.org/VorbisComment
+ */
+#include "tagspriv.h"
+
+#define leuint(d) (uint)(((uchar*)(d))[3]<<24 | ((uchar*)(d))[2]<<16 | ((uchar*)(d))[1]<<8 | ((uchar*)(d))[0]<<0)
+
+void
+cbvorbiscomment(Tagctx *ctx, char *k, char *v){
+ if(*v == 0)
+ return;
+ if(cistrcmp(k, "album") == 0)
+ txtcb(ctx, Talbum, v);
+ else if(cistrcmp(k, "title") == 0)
+ txtcb(ctx, Ttitle, v);
+ else if(cistrcmp(k, "artist") == 0 || cistrcmp(k, "performer") == 0)
+ txtcb(ctx, Tartist, v);
+ else if(cistrcmp(k, "tracknumber") == 0)
+ txtcb(ctx, Ttrack, v);
+ else if(cistrcmp(k, "date") == 0)
+ txtcb(ctx, Tdate, v);
+ else if(cistrcmp(k, "replaygain_track_peak") == 0)
+ txtcb(ctx, Ttrackpeak, v);
+ else if(cistrcmp(k, "replaygain_track_gain") == 0)
+ txtcb(ctx, Ttrackgain, v);
+ else if(cistrcmp(k, "replaygain_album_peak") == 0)
+ txtcb(ctx, Talbumpeak, v);
+ else if(cistrcmp(k, "replaygain_album_gain") == 0)
+ txtcb(ctx, Talbumgain, v);
+ else if(cistrcmp(k, "genre") == 0)
+ txtcb(ctx, Tgenre, v);
+}
+
+int
+tagvorbis(Tagctx *ctx)
+{
+ char *v;
+ uchar *d, h[4];
+ int sz, numtags, i, npages;
+
+ d = (uchar*)ctx->buf;
+ /* need to find vorbis frame with type=3 */
+ for(npages = 0; npages < 2; npages++){ /* vorbis comment is the second header */
+ int nsegs;
+ if(ctx->read(ctx, d, 27) != 27)
+ return -1;
+ if(memcmp(d, "OggS", 4) != 0)
+ return -1;
+
+ /* calculate the size of the packet */
+ nsegs = d[26];
+ if(ctx->read(ctx, d, nsegs+1) != nsegs+1)
+ return -1;
+ for(sz = i = 0; i < nsegs; sz += d[i++]);
+
+ if(d[nsegs] == 3) /* comment */
+ break;
+ if(d[nsegs] == 1 && sz >= 28){ /* identification */
+ if(ctx->read(ctx, d, 28) != 28)
+ return -1;
+ sz -= 28;
+ ctx->channels = d[10];
+ ctx->samplerate = leuint(&d[11]);
+ if((ctx->bitrate = leuint(&d[15])) == 0) /* maximum */
+ ctx->bitrate = leuint(&d[19]); /* nominal */
+ }
+
+ ctx->seek(ctx, sz-1, 1);
+ }
+
+ if(ctx->read(ctx, &d[1], 10) != 10 || memcmp(&d[1], "vorbis", 6) != 0)
+ return -1;
+ sz = leuint(&d[7]);
+ if(ctx->seek(ctx, sz, 1) < 0 || ctx->read(ctx, h, 4) != 4)
+ return -1;
+ numtags = leuint(h);
+
+ for(i = 0; i < numtags; i++){
+ if(ctx->read(ctx, h, 4) != 4)
+ return -1;
+ if((sz = leuint(h)) < 0)
+ return -1;
+
+ if(ctx->bufsz < sz+1){
+ if(ctx->seek(ctx, sz, 1) < 0)
+ return -1;
+ continue;
+ }
+ if(ctx->read(ctx, ctx->buf, sz) != sz)
+ return -1;
+ ctx->buf[sz] = 0;
+
+ if((v = strchr(ctx->buf, '=')) == nil)
+ return -1;
+ *v++ = 0;
+ cbvorbiscomment(ctx, ctx->buf, v);
+ }
+
+ /* calculate the duration */
+ if(ctx->samplerate > 0){
+ sz = ctx->bufsz <= 4096 ? ctx->bufsz : 4096;
+ for(i = sz; i < 65536+16; i += sz - 16){
+ if(ctx->seek(ctx, -i, 2) <= 0)
+ break;
+ v = ctx->buf;
+ if(ctx->read(ctx, v, sz) != sz)
+ break;
+ for(; v != nil && v < ctx->buf+sz;){
+ v = memchr(v, 'O', ctx->buf+sz - v - 14);
+ if(v != nil && v[1] == 'g' && v[2] == 'g' && v[3] == 'S' && (v[5] & 4) == 4){ /* last page */
+ uvlong g = leuint(v+6) | (uvlong)leuint(v+10)<<32;
+ ctx->duration = g * 1000 / ctx->samplerate;
+ return 0;
+ }
+ if(v != nil)
+ v++;
+ }
+ }
+ }
+
+ return 0;
+}