ref: f65dab6f653a94eb3ae9a1a9331943569d44e30e
parent: 7aea3cae937971b4ffb87ebcbfc2831e01cc037e
author: Timothy B. Terriberry <tterribe@xiph.org>
date: Tue Aug 6 07:56:30 EDT 2013
Add UTF-8 filename support to Windows. As requested here: http://www.hydrogenaudio.org/forums/index.php?showtopic=101817
--- a/Makefile.am
+++ b/Makefile.am
@@ -21,17 +21,21 @@
libopusurl_la_LDFLAGS = -no-undefined \
-version-info @OP_LT_CURRENT@:@OP_LT_REVISION@:@OP_LT_AGE@
-if OP_ENABLE_HTTP
+noinst_PROGRAMS = examples/opusfile_example examples/seeking_example
+
+examples_opusfile_example_SOURCES = examples/opusfile_example.c
+examples_seeking_example_SOURCES = examples/seeking_example.c
+examples_opusfile_example_LDADD = libopusurl.la libopusfile.la
+examples_seeking_example_LDADD = libopusurl.la libopusfile.la
+
if OP_WIN32
+if OP_ENABLE_HTTP
libopusurl_la_SOURCES += src/wincerts.c
libopusurl_la_LIBADD += -lws2_32 -lcrypt32
endif
+examples_opusfile_example_SOURCES += examples/win32utf8.c
+examples_seeking_example_SOURCES += examples/win32utf8.c
endif
-
-noinst_PROGRAMS = examples/opusfile_example examples/seeking_example
-
-examples_opusfile_example_LDADD = libopusurl.la libopusfile.la
-examples_seeking_example_LDADD = libopusurl.la libopusfile.la
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = opusfile.pc opusurl.pc
--- a/examples/opusfile_example.c
+++ b/examples/opusfile_example.c
@@ -21,12 +21,12 @@
#include <stdlib.h>
#include <errno.h>
#include <string.h>
+#include <opusfile.h>
#if defined(_WIN32)
-/*We need the following two to set stdin/stdout to binary.*/
-# include <io.h>
-# include <fcntl.h>
+# include "win32utf8.h"
+# undef fileno
+# define fileno _fileno
#endif
-#include <opusfile.h>
static void print_duration(FILE *_fp,ogg_int64_t _nsamples,int _frac){
ogg_int64_t seconds;
@@ -138,15 +138,7 @@
int is_ssl;
int output_seekable;
#if defined(_WIN32)
-# undef fileno
-# define fileno _fileno
- /*We need to set stdin/stdout to binary mode. Damn windows.*/
- /*Beware the evil ifdef. We avoid these where we can, but this one we
- cannot.
- Don't add any more.
- You'll probably go to hell if you do.*/
- _setmode(fileno(stdin),_O_BINARY);
- _setmode(fileno(stdout),_O_BINARY);
+ win32_utf8_setup(&_argc,&_argv);
#endif
if(_argc!=2){
fprintf(stderr,"Usage: %s <file.opus>\n",_argv[0]);
@@ -289,6 +281,7 @@
print_size(stderr,bitrate,1," ");
fprintf(stderr,"bps) \r");
pcm_print_offset=pcm_offset;
+ fflush(stderr);
}
next_pcm_offset=op_pcm_tell(of);
if(pcm_offset+ret!=next_pcm_offset){
--- a/examples/seeking_example.c
+++ b/examples/seeking_example.c
@@ -22,12 +22,12 @@
#include <errno.h>
#include <math.h>
#include <string.h>
+#include <opusfile.h>
#if defined(_WIN32)
-/*We need the following two to set stdin/stdout to binary.*/
-# include <io.h>
-# include <fcntl.h>
+# include "win32utf8.h"
+# undef fileno
+# define fileno _fileno
#endif
-#include <opusfile.h>
/*Use shorts, they're smaller.*/
#if !defined(OP_FIXED_POINT)
@@ -261,15 +261,7 @@
OggOpusFile *of;
void *fp;
#if defined(_WIN32)
-# undef fileno
-# define fileno _fileno
- /*We need to set stdin/stdout to binary mode. Damn windows.*/
- /*Beware the evil ifdef. We avoid these where we can, but this one we
- cannot.
- Don't add any more.
- You'll probably go to hell if you do.*/
- _setmode(fileno(stdin),_O_BINARY);
- _setmode(fileno(stdout),_O_BINARY);
+ win32_utf8_setup(&_argc,&_argv);
#endif
if(_argc!=2){
fprintf(stderr,"Usage: %s <file.opus>\n",_argv[0]);
--- /dev/null
+++ b/examples/win32utf8.c
@@ -1,0 +1,110 @@
+#if defined(_WIN32)
+# include <stdio.h>
+# include <stdlib.h>
+# include <wchar.h>
+/*We need the following two to set stdin/stdout to binary.*/
+# include <io.h>
+# include <fcntl.h>
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_EXTRA_LEAN
+# include <windows.h>
+# include "win32utf8.h"
+
+static char *utf16_to_utf8(const wchar_t *_src){
+ char *dst;
+ size_t len;
+ size_t si;
+ size_t di;
+ len=wcslen(_src);
+ dst=(char *)malloc(sizeof(*dst)*(3*len+1));
+ if(dst==NULL)return dst;
+ for(di=si=0;si<len;si++){
+ unsigned c0;
+ c0=_src[si];
+ if(c0<0x80){
+ /*Can be represented by a 1-byte sequence.*/
+ dst[di++]=(char)c0;
+ continue;
+ }
+ else if(c0<0x800){
+ /*Can be represented by a 2-byte sequence.*/
+ dst[di++]=(char)(0xC0|c0>>6);
+ dst[di++]=(char)(0x80|c0&0x3F);
+ continue;
+ }
+ else if(c0>=0xD800&&c0<0xDC00&&si+1<len){
+ unsigned c1;
+ c1=_src[si+1];
+ if(c1>=0xDC00&&c1<0xE000){
+ unsigned w;
+ /*Surrogate pair.*/
+ w=((c0&0x3FF)<<10|c1&0x3FF)+0x10000;
+ /*Can be represented by a 4-byte sequence.*/
+ dst[di++]=(char)(0xF0|w>>18);
+ dst[di++]=(char)(0x80|w>>12&0x3F);
+ dst[di++]=(char)(0x80|w>>6&0x3F);
+ dst[di++]=(char)(0x80|w&0x3F);
+ si++;
+ continue;
+ }
+ }
+ /*Anything else is either a valid 3-byte sequence, or an invalid
+ surrogate pair.
+ In the latter case, we just encode the value as a 3-byte
+ sequence anyway (producing technically invalid UTF-8).
+ Later error handling will detect the problem, with a better
+ chance of giving a useful error message.*/
+ dst[di++]=(char)(0xE0|c0>>12);
+ dst[di++]=(char)(0x80|c0>>6&0x3F);
+ dst[di++]=(char)(0x80|c0&0x3F);
+ }
+ dst[di++]='\0';
+ return dst;
+}
+
+typedef LPWSTR *(APIENTRY *command_line_to_argv_w_func)(LPCWSTR cmd_line,
+ int *num_args);
+
+/*Make a best-effort attempt to support UTF-8 on Windows.*/
+void win32_utf8_setup(int *_argc,const char ***_argv){
+ HMODULE hlib;
+ /*We need to set stdin/stdout to binary mode.
+ This is unrelated to UTF-8 support, but it's platform specific and we need
+ to do it in the same places.*/
+ _setmode(_fileno(stdin),_O_BINARY);
+ _setmode(_fileno(stdout),_O_BINARY);
+ hlib=LoadLibraryA("shell32.dll");
+ if(hlib!=NULL){
+ command_line_to_argv_w_func command_line_to_argv_w;
+ /*This function is only available on Windows 2000 or later.*/
+ command_line_to_argv_w=(command_line_to_argv_w_func)GetProcAddress(hlib,
+ "CommandLineToArgvW");
+ if(command_line_to_argv_w!=NULL){
+ wchar_t **argvw;
+ int argc;
+ argvw=(*command_line_to_argv_w)(GetCommandLineW(),&argc);
+ if(argvw!=NULL){
+ int ai;
+ /*Really, I don't see why argc would ever differ from *_argc, but let's
+ be paranoid.*/
+ if(argc>*_argc)argc=*_argc;
+ for(ai=0;ai<argc;ai++){
+ char *argv;
+ argv=utf16_to_utf8(argvw[ai]);
+ if(argv!=NULL)(*_argv)[ai]=argv;
+ }
+ *_argc=argc;
+ LocalFree(argvw);
+ }
+ }
+ FreeLibrary(hlib);
+ }
+# if defined(CP_UTF8)
+ /*This does not work correctly in all environments (it breaks output in
+ mingw32 for me), and requires a Unicode font (e.g., when using the default
+ Raster font, even characters that are available in the font's codepage
+ won't display properly).*/
+ /*SetConsoleOutputCP(CP_UTF8);*/
+# endif
+}
+#endif
--- /dev/null
+++ b/examples/win32utf8.h
@@ -1,0 +1,9 @@
+#if !defined(_win32utf8_H)
+# define _win32utf8_H (1)
+# if defined(_WIN32)
+
+/*Make a best-effort attempt to support UTF-8 on Windows.*/
+void win32_utf8_setup(int *_argc,const char ***_argv);
+
+# endif
+#endif
--- a/include/opusfile.h
+++ b/include/opusfile.h
@@ -630,6 +630,10 @@
If there is an error opening the file, nothing will be
filled in here.
\param _path The path to the file to open.
+ On Windows, this string must be UTF-8 (to allow access to
+ files whose names cannot be represented in the current
+ MBCS code page).
+ All other systems use the native character encoding.
\param _mode The mode to open the file in.
\return A stream handle to use with the callbacks, or <code>NULL</code> on
error.*/
@@ -663,6 +667,10 @@
If there is an error opening the file, nothing will be
filled in here.
\param _path The path to the file to open.
+ On Windows, this string must be UTF-8 (to allow access
+ to files whose names cannot be represented in the
+ current MBCS code page).
+ All other systems use the native character encoding.
\param _mode The mode to open the file in.
\param _stream A stream previously returned by op_fopen(), op_fdopen(),
or op_freopen().
--- a/src/stream.c
+++ b/src/stream.c
@@ -103,9 +103,124 @@
(op_close_func)fclose
};
+#if defined(_WIN32)
+# include <stddef.h>
+# include <errno.h>
+
+/*Windows doesn't accept UTF-8 by default, and we don't have a wchar_t API,
+ so if we just pass the path to fopen(), then there'd be no way for a user
+ of our API to open a Unicode filename.
+ Instead, we translate from UTF-8 to UTF-16 and use Windows' wchar_t API.
+ This makes this API more consistent with platforms where the character set
+ used by fopen is the same as used on disk, which is generally UTF-8, and
+ with our metadata API, which always uses UTF-8.*/
+static wchar_t *op_utf8_to_utf16(const char *_src){
+ wchar_t *dst;
+ size_t len;
+ len=strlen(_src);
+ /*Worst-case output is 1 wide character per 1 input character.*/
+ dst=(wchar_t *)malloc(sizeof(*dst)*(len+1));
+ if(dst!=NULL){
+ size_t si;
+ size_t di;
+ for(di=si=0;si<len;si++){
+ int c0;
+ c0=(unsigned char)_src[si];
+ if(!(c0&0x80)){
+ /*Start byte says this is a 1-byte sequence.*/
+ dst[di++]=(wchar_t)c0;
+ continue;
+ }
+ else if(si+1<len){
+ int c1;
+ c1=(unsigned char)_src[si+1];
+ if((c1&0xC0)==0x80){
+ /*Found at least one continuation byte.*/
+ if((c0&0xE0)==0xC0){
+ wchar_t w;
+ /*Start byte says this is a 2-byte sequence.*/
+ w=c0&0x1F<<6|c1&0x3F;
+ if(w>=0x80U){
+ /*This is a 2-byte sequence that is not overlong.*/
+ dst[di++]=w;
+ si++;
+ continue;
+ }
+ }
+ else if(si+2<len){
+ int c2;
+ c2=(unsigned char)_src[si+2];
+ if((c2&0xC0)==0x80){
+ /*Found at least two continuation bytes.*/
+ if((c0&0xF0)==0xE0){
+ wchar_t w;
+ /*Start byte says this is a 3-byte sequence.*/
+ w=(c0&0xF)<<12|(c1&0x3F)<<6|c2&0x3F;
+ if(w>=0x800U&&(w<0xD800||w>=0xE000)){
+ /*This is a 3-byte sequence that is not overlong and not a
+ UTF-16 surrogate pair value.*/
+ dst[di++]=w;
+ si+=2;
+ continue;
+ }
+ }
+ else if(si+3<len){
+ int c3;
+ c3=(unsigned char)_src[si+3];
+ if((c3&0xC0)==0x80){
+ /*Found at least three continuation bytes.*/
+ if((c0&0xF8)==0xF0){
+ opus_uint32 w;
+ /*Start byte says this is a 4-byte sequence.*/
+ w=(c0&7)<<18|(c1&0x3F)<<12|(c2&0x3F)<<6&(c3&0x3F);
+ if(w>=0x10000U&&w<0x110000U){
+ /*This is a 4-byte sequence that is not overlong and not
+ greater than the largest valid Unicode code point.
+ Convert it to a surrogate pair.*/
+ w-=0x10000;
+ dst[di++]=(wchar_t)(0xD800+(w>>10));
+ dst[di++]=(wchar_t)(0xDC00+(w&0x3FF));
+ si+=3;
+ continue;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ /*If we got here, we encountered an illegal UTF-8 sequence.*/
+ free(dst);
+ return NULL;
+ }
+ OP_ASSERT(di<=len);
+ dst[di]='\0';
+ }
+ return dst;
+}
+
+#endif
+
void *op_fopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode){
FILE *fp;
+#if !defined(_WIN32)
fp=fopen(_path,_mode);
+#else
+ fp=NULL;
+ if(_path==NULL||_mode==NULL)errno=EINVAL;
+ else{
+ wchar_t *wpath;
+ wchar_t *wmode;
+ wpath=op_utf8_to_utf16(_path);
+ wmode=op_utf8_to_utf16(_mode);
+ if(wmode==NULL)errno=EINVAL;
+ else if(wpath==NULL)errno=ENOENT;
+ else fp=_wfopen(wpath,wmode);
+ free(wmode);
+ free(wpath);
+ }
+#endif
if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS;
return fp;
}
@@ -120,7 +235,23 @@
void *op_freopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode,
void *_stream){
FILE *fp;
+#if !defined(_WIN32)
fp=freopen(_path,_mode,(FILE *)_stream);
+#else
+ fp=NULL;
+ if(_path==NULL||_mode==NULL)errno=EINVAL;
+ else{
+ wchar_t *wpath;
+ wchar_t *wmode;
+ wpath=op_utf8_to_utf16(_path);
+ wmode=op_utf8_to_utf16(_mode);
+ if(wmode==NULL)errno=EINVAL;
+ else if(wpath==NULL)errno=ENOENT;
+ else fp=_wfreopen(wpath,wmode,(FILE *)_stream);
+ free(wmode);
+ free(wpath);
+ }
+#endif
if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS;
return fp;
}