UTF-8 to CP932 変換表


UTF-8からShift-JISへ変換するプログラムを作ってみる

UTF-8とShift-JISの文字コードを変換するにあたって、簡単な計算では求まらないことは周知のことと思います。
そこで、簡単に変換できるように変換表を作ってみることにします。
ちなみにVisual Studio 2015で作りました。

// stdafx.h : 標準のシステム インクルード ファイルのインクルード ファイル、または
// 参照回数が多く、かつあまり変更されない、プロジェクト専用のインクルード ファイル
// を記述します。
//

#pragma once

#include "targetver.h"

#include <stdio.h>
#include <tchar.h>



// TODO: プログラムに必要な追加ヘッダーをここで参照してください
#include <windows.h>
// utf8tocp932.cpp : コンソール アプリケーションのエントリ ポイントを定義します。
//

#include "stdafx.h"

typedef struct { char buf[7]; } utf8char;

void utf8tocp932(unsigned const char *utf)
{
	int lenghtUnicode = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)utf, strlen((const char *)utf) + 1, NULL, 0);
	if (lenghtUnicode) {
		wchar_t* bufUnicode = new wchar_t[lenghtUnicode];
		MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)utf, strlen((const char *)utf) + 1, bufUnicode, lenghtUnicode);

		int lengthSJis = WideCharToMultiByte(CP_THREAD_ACP, 0, bufUnicode, -1, NULL, 0, NULL, NULL);
		if (lengthSJis) {
			char* bufShiftJis = new char[lengthSJis];
			BOOL usedDefaultChar;
			WideCharToMultiByte(CP_THREAD_ACP, WC_NO_BEST_FIT_CHARS, bufUnicode, lenghtUnicode + 1, bufShiftJis, lengthSJis, "??", &usedDefaultChar);

			if (!usedDefaultChar) {
				printf("{{");
				for (int i = 0; strlen((const char *)utf) > i; i++) {
					printf("0x%02x,", utf[i]);
				}
				printf("}, {");
				for (int i = 0; strlen(bufShiftJis) > i; i++) {
					printf("0x%02x,", (unsigned char)bufShiftJis[i]);
				}
				printf("}}\t// %s\n", bufShiftJis);
			}
			delete bufShiftJis;
		}
		delete bufUnicode;
	}
}

int main()
{

	for (unsigned char chr1 = 0x20; 0x7f > chr1; chr1++) {
		unsigned char str[] = { chr1 , 0x00 };
		utf8tocp932(str);
	}
	for (unsigned char chr1 = 0xC2; 0xef >= chr1; chr1++) {
		for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) {
			unsigned char str[] = { chr1 , chr2 , 0x00 };
			utf8tocp932(str);
		}
	}
	for (unsigned char chr2 = 0xa0; 0xbf >= chr2; chr2++) {
		for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
			unsigned char str[] = { 0xe0 , chr2 , chr3 , 0x00 };
			utf8tocp932(str);
		}
	}
	for (unsigned char chr1 = 0xe1; 0xef >= chr1; chr1++) {
		for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) {
			for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
				unsigned char str[] = { chr1 , chr2, chr3 , 0x00 };
				utf8tocp932(str);
			}
		}
	}
	for (unsigned char chr2 = 0x90; 0xbf >= chr2; chr2++) {
		for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
			for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) {
				unsigned char str[] = { 0xf0 , chr2, chr3, chr4 , 0x00 };
				utf8tocp932(str);
			}
		}
	}
	for (unsigned char chr1 = 0xf1; 0xf3 >= chr1; chr1++) {
		for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) {
			for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
				for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) {
					unsigned char str[] = { chr1 , chr2, chr3, chr4 , 0x00 };
					utf8tocp932(str);
				}
			}
		}
	}
	for (unsigned char chr2 = 0x80; 0x8f >= chr2; chr2++) {
		for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
			for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) {
				unsigned char str[] = { 0xf4 , chr2, chr3, chr4 , 0x00 };
				utf8tocp932(str);
			}
		}
	}
	for (unsigned char chr1 = 0xf5; 0xf7 >= chr1; chr1++) {
		for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) {
			for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
				for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) {
					unsigned char str[] = { chr1 , chr2, chr3, chr4 , 0x00 };
					utf8tocp932(str);
				}
			}
		}
	}
	for (unsigned char chr1 = 0xf8; 0xfb >= chr1; chr1++) {
		for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) {
			for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
				for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) {
					for (unsigned char chr5 = 0x80; 0xbf >= chr5; chr5++) {
						unsigned char str[] = { chr1 , chr2, chr3, chr4, chr5 , 0x00 };
						utf8tocp932(str);
					}
				}
			}
		}
	}
	for (unsigned char chr1 = 0xfc; 0xfd >= chr1; chr1++) {
		for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) {
			for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) {
				for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) {
					for (unsigned char chr5 = 0x80; 0xbf >= chr5; chr5++) {
						for (unsigned char chr6 = 0x80; 0xbf >= chr6; chr6++) {
							unsigned char str[] = { chr1 , chr2, chr3, chr4, chr5, chr6 , 0x00 };
							utf8tocp932(str);
						}
					}
				}
			}
		}
	}

	return 0;
}

参照

UTF8なstring入れたらShiftJISなstring出てくる関数作った
UTF-8