UTF-8とShift-JISの文字コードを変換するにあたって、簡単な計算では求まらないことは周知のことと思います。
そこで、簡単に変換できるように変換表を作ってみることにします。
ちなみにVisual Studio 2015で作りました。
// stdafx.h : 標準のシステム インクルード ファイルのインクルード ファイル、または // 参照回数が多く、かつあまり変更されない、プロジェクト専用のインクルード ファイル // を記述します。 // #pragma once #include "targetver.h" #include <stdio.h> #include <tchar.h> // TODO: プログラムに必要な追加ヘッダーをここで参照してください #include <windows.h>
// utf8tocp932.cpp : コンソール アプリケーションのエントリ ポイントを定義します。
//
#include "stdafx.h"
typedef struct { char buf[7]; } utf8char;
void utf8tocp932(unsigned const char *utf)
{
int lenghtUnicode = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)utf, strlen((const char *)utf) + 1, NULL, 0);
if (lenghtUnicode) {
wchar_t* bufUnicode = new wchar_t[lenghtUnicode];
MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)utf, strlen((const char *)utf) + 1, bufUnicode, lenghtUnicode);
int lengthSJis = WideCharToMultiByte(CP_THREAD_ACP, 0, bufUnicode, -1, NULL, 0, NULL, NULL);
if (lengthSJis) {
char* bufShiftJis = new char[lengthSJis];
BOOL usedDefaultChar;
WideCharToMultiByte(CP_THREAD_ACP, WC_NO_BEST_FIT_CHARS, bufUnicode, lenghtUnicode + 1, bufShiftJis, lengthSJis, "??", &usedDefaultChar);
if (!usedDefaultChar) {
printf("{{");
for (int i = 0; strlen((const char *)utf) > i; i++) {
printf("0x%02x,", utf[i]);
}
printf("}, {");
for (int i = 0; strlen(bufShiftJis) > i; i++) {
printf("0x%02x,", (unsigned char)bufShiftJis[i]);
}
printf("}}\t// %s\n", bufShiftJis);
}
delete bufShiftJis;
}
delete bufUnicode;
}
}
int main()
{
for (unsigned char chr1 = 0x20; 0x7f > chr1; chr1++) {
unsigned char str[] = { chr1 , 0x00 };
utf8tocp932(str);
}
for (unsigned char chr1 = 0xC2; 0xef >= chr1; chr1++) {
for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) {
unsigned char str[] = { chr1 , chr2 , 0x00 };
utf8tocp932(str);
}
}
for (unsigned char chr2 = 0xa0; 0xbf >= chr2; chr2++) {
for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
unsigned char str[] = { 0xe0 , chr2 , chr3 , 0x00 };
utf8tocp932(str);
}
}
for (unsigned char chr1 = 0xe1; 0xef >= chr1; chr1++) {
for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) {
for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
unsigned char str[] = { chr1 , chr2, chr3 , 0x00 };
utf8tocp932(str);
}
}
}
for (unsigned char chr2 = 0x90; 0xbf >= chr2; chr2++) {
for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) {
unsigned char str[] = { 0xf0 , chr2, chr3, chr4 , 0x00 };
utf8tocp932(str);
}
}
}
for (unsigned char chr1 = 0xf1; 0xf3 >= chr1; chr1++) {
for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) {
for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) {
unsigned char str[] = { chr1 , chr2, chr3, chr4 , 0x00 };
utf8tocp932(str);
}
}
}
}
for (unsigned char chr2 = 0x80; 0x8f >= chr2; chr2++) {
for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) {
unsigned char str[] = { 0xf4 , chr2, chr3, chr4 , 0x00 };
utf8tocp932(str);
}
}
}
for (unsigned char chr1 = 0xf5; 0xf7 >= chr1; chr1++) {
for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) {
for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) {
unsigned char str[] = { chr1 , chr2, chr3, chr4 , 0x00 };
utf8tocp932(str);
}
}
}
}
for (unsigned char chr1 = 0xf8; 0xfb >= chr1; chr1++) {
for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) {
for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) {
for (unsigned char chr5 = 0x80; 0xbf >= chr5; chr5++) {
unsigned char str[] = { chr1 , chr2, chr3, chr4, chr5 , 0x00 };
utf8tocp932(str);
}
}
}
}
}
for (unsigned char chr1 = 0xfc; 0xfd >= chr1; chr1++) {
for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) {
for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) {
for (unsigned char chr5 = 0x80; 0xbf >= chr5; chr5++) {
for (unsigned char chr6 = 0x80; 0xbf >= chr6; chr6++) {
unsigned char str[] = { chr1 , chr2, chr3, chr4, chr5, chr6 , 0x00 };
utf8tocp932(str);
}
}
}
}
}
}
return 0;
}
参照
UTF8なstring入れたらShiftJISなstring出てくる関数作った
UTF-8