first_user
11 months ago
commit
da88f58351
8 changed files with 1054 additions and 0 deletions
-
63.gitattributes
-
363.gitignore
-
31csISO2022JP_test.sln
-
4csISO2022JP_test/cat.bat
-
306csISO2022JP_test/csISO2022JP_test.cpp
-
194csISO2022JP_test/csISO2022JP_test.vcxproj
-
8csISO2022JP_test/head.html.txt
-
85csISO2022JP_test/tail.html.txt
@ -0,0 +1,63 @@ |
|||
############################################################################### |
|||
# Set default behavior to automatically normalize line endings. |
|||
############################################################################### |
|||
* text=auto |
|||
|
|||
############################################################################### |
|||
# Set default behavior for command prompt diff. |
|||
# |
|||
# This is need for earlier builds of msysgit that does not have it on by |
|||
# default for csharp files. |
|||
# Note: This is only used by command line |
|||
############################################################################### |
|||
#*.cs diff=csharp |
|||
|
|||
############################################################################### |
|||
# Set the merge driver for project and solution files |
|||
# |
|||
# Merging from the command prompt will add diff markers to the files if there |
|||
# are conflicts (Merging from VS is not affected by the settings below, in VS |
|||
# the diff markers are never inserted). Diff markers may cause the following |
|||
# file extensions to fail to load in VS. An alternative would be to treat |
|||
# these files as binary and thus will always conflict and require user |
|||
# intervention with every merge. To do so, just uncomment the entries below |
|||
############################################################################### |
|||
#*.sln merge=binary |
|||
#*.csproj merge=binary |
|||
#*.vbproj merge=binary |
|||
#*.vcxproj merge=binary |
|||
#*.vcproj merge=binary |
|||
#*.dbproj merge=binary |
|||
#*.fsproj merge=binary |
|||
#*.lsproj merge=binary |
|||
#*.wixproj merge=binary |
|||
#*.modelproj merge=binary |
|||
#*.sqlproj merge=binary |
|||
#*.wwaproj merge=binary |
|||
|
|||
############################################################################### |
|||
# behavior for image files |
|||
# |
|||
# image files are treated as binary by default. |
|||
############################################################################### |
|||
#*.jpg binary |
|||
#*.png binary |
|||
#*.gif binary |
|||
|
|||
############################################################################### |
|||
# diff behavior for common document formats |
|||
# |
|||
# Convert binary document formats to text before diffing them. This feature |
|||
# is only available from the command line. Turn it on by uncommenting the |
|||
# entries below. |
|||
############################################################################### |
|||
#*.doc diff=astextplain |
|||
#*.DOC diff=astextplain |
|||
#*.docx diff=astextplain |
|||
#*.DOCX diff=astextplain |
|||
#*.dot diff=astextplain |
|||
#*.DOT diff=astextplain |
|||
#*.pdf diff=astextplain |
|||
#*.PDF diff=astextplain |
|||
#*.rtf diff=astextplain |
|||
#*.RTF diff=astextplain |
@ -0,0 +1,363 @@ |
|||
## Ignore Visual Studio temporary files, build results, and |
|||
## files generated by popular Visual Studio add-ons. |
|||
## |
|||
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore |
|||
|
|||
# User-specific files |
|||
*.rsuser |
|||
*.suo |
|||
*.user |
|||
*.userosscache |
|||
*.sln.docstates |
|||
|
|||
# User-specific files (MonoDevelop/Xamarin Studio) |
|||
*.userprefs |
|||
|
|||
# Mono auto generated files |
|||
mono_crash.* |
|||
|
|||
# Build results |
|||
[Dd]ebug/ |
|||
[Dd]ebugPublic/ |
|||
[Rr]elease/ |
|||
[Rr]eleases/ |
|||
x64/ |
|||
x86/ |
|||
[Ww][Ii][Nn]32/ |
|||
[Aa][Rr][Mm]/ |
|||
[Aa][Rr][Mm]64/ |
|||
bld/ |
|||
[Bb]in/ |
|||
[Oo]bj/ |
|||
[Oo]ut/ |
|||
[Ll]og/ |
|||
[Ll]ogs/ |
|||
|
|||
# Visual Studio 2015/2017 cache/options directory |
|||
.vs/ |
|||
# Uncomment if you have tasks that create the project's static files in wwwroot |
|||
#wwwroot/ |
|||
|
|||
# Visual Studio 2017 auto generated files |
|||
Generated\ Files/ |
|||
|
|||
# MSTest test Results |
|||
[Tt]est[Rr]esult*/ |
|||
[Bb]uild[Ll]og.* |
|||
|
|||
# NUnit |
|||
*.VisualState.xml |
|||
TestResult.xml |
|||
nunit-*.xml |
|||
|
|||
# Build Results of an ATL Project |
|||
[Dd]ebugPS/ |
|||
[Rr]eleasePS/ |
|||
dlldata.c |
|||
|
|||
# Benchmark Results |
|||
BenchmarkDotNet.Artifacts/ |
|||
|
|||
# .NET Core |
|||
project.lock.json |
|||
project.fragment.lock.json |
|||
artifacts/ |
|||
|
|||
# ASP.NET Scaffolding |
|||
ScaffoldingReadMe.txt |
|||
|
|||
# StyleCop |
|||
StyleCopReport.xml |
|||
|
|||
# Files built by Visual Studio |
|||
*_i.c |
|||
*_p.c |
|||
*_h.h |
|||
*.ilk |
|||
*.meta |
|||
*.obj |
|||
*.iobj |
|||
*.pch |
|||
*.pdb |
|||
*.ipdb |
|||
*.pgc |
|||
*.pgd |
|||
*.rsp |
|||
*.sbr |
|||
*.tlb |
|||
*.tli |
|||
*.tlh |
|||
*.tmp |
|||
*.tmp_proj |
|||
*_wpftmp.csproj |
|||
*.log |
|||
*.vspscc |
|||
*.vssscc |
|||
.builds |
|||
*.pidb |
|||
*.svclog |
|||
*.scc |
|||
|
|||
# Chutzpah Test files |
|||
_Chutzpah* |
|||
|
|||
# Visual C++ cache files |
|||
ipch/ |
|||
*.aps |
|||
*.ncb |
|||
*.opendb |
|||
*.opensdf |
|||
*.sdf |
|||
*.cachefile |
|||
*.VC.db |
|||
*.VC.VC.opendb |
|||
|
|||
# Visual Studio profiler |
|||
*.psess |
|||
*.vsp |
|||
*.vspx |
|||
*.sap |
|||
|
|||
# Visual Studio Trace Files |
|||
*.e2e |
|||
|
|||
# TFS 2012 Local Workspace |
|||
$tf/ |
|||
|
|||
# Guidance Automation Toolkit |
|||
*.gpState |
|||
|
|||
# ReSharper is a .NET coding add-in |
|||
_ReSharper*/ |
|||
*.[Rr]e[Ss]harper |
|||
*.DotSettings.user |
|||
|
|||
# TeamCity is a build add-in |
|||
_TeamCity* |
|||
|
|||
# DotCover is a Code Coverage Tool |
|||
*.dotCover |
|||
|
|||
# AxoCover is a Code Coverage Tool |
|||
.axoCover/* |
|||
!.axoCover/settings.json |
|||
|
|||
# Coverlet is a free, cross platform Code Coverage Tool |
|||
coverage*.json |
|||
coverage*.xml |
|||
coverage*.info |
|||
|
|||
# Visual Studio code coverage results |
|||
*.coverage |
|||
*.coveragexml |
|||
|
|||
# NCrunch |
|||
_NCrunch_* |
|||
.*crunch*.local.xml |
|||
nCrunchTemp_* |
|||
|
|||
# MightyMoose |
|||
*.mm.* |
|||
AutoTest.Net/ |
|||
|
|||
# Web workbench (sass) |
|||
.sass-cache/ |
|||
|
|||
# Installshield output folder |
|||
[Ee]xpress/ |
|||
|
|||
# DocProject is a documentation generator add-in |
|||
DocProject/buildhelp/ |
|||
DocProject/Help/*.HxT |
|||
DocProject/Help/*.HxC |
|||
DocProject/Help/*.hhc |
|||
DocProject/Help/*.hhk |
|||
DocProject/Help/*.hhp |
|||
DocProject/Help/Html2 |
|||
DocProject/Help/html |
|||
|
|||
# Click-Once directory |
|||
publish/ |
|||
|
|||
# Publish Web Output |
|||
*.[Pp]ublish.xml |
|||
*.azurePubxml |
|||
# Note: Comment the next line if you want to checkin your web deploy settings, |
|||
# but database connection strings (with potential passwords) will be unencrypted |
|||
*.pubxml |
|||
*.publishproj |
|||
|
|||
# Microsoft Azure Web App publish settings. Comment the next line if you want to |
|||
# checkin your Azure Web App publish settings, but sensitive information contained |
|||
# in these scripts will be unencrypted |
|||
PublishScripts/ |
|||
|
|||
# NuGet Packages |
|||
*.nupkg |
|||
# NuGet Symbol Packages |
|||
*.snupkg |
|||
# The packages folder can be ignored because of Package Restore |
|||
**/[Pp]ackages/* |
|||
# except build/, which is used as an MSBuild target. |
|||
!**/[Pp]ackages/build/ |
|||
# Uncomment if necessary however generally it will be regenerated when needed |
|||
#!**/[Pp]ackages/repositories.config |
|||
# NuGet v3's project.json files produces more ignorable files |
|||
*.nuget.props |
|||
*.nuget.targets |
|||
|
|||
# Microsoft Azure Build Output |
|||
csx/ |
|||
*.build.csdef |
|||
|
|||
# Microsoft Azure Emulator |
|||
ecf/ |
|||
rcf/ |
|||
|
|||
# Windows Store app package directories and files |
|||
AppPackages/ |
|||
BundleArtifacts/ |
|||
Package.StoreAssociation.xml |
|||
_pkginfo.txt |
|||
*.appx |
|||
*.appxbundle |
|||
*.appxupload |
|||
|
|||
# Visual Studio cache files |
|||
# files ending in .cache can be ignored |
|||
*.[Cc]ache |
|||
# but keep track of directories ending in .cache |
|||
!?*.[Cc]ache/ |
|||
|
|||
# Others |
|||
ClientBin/ |
|||
~$* |
|||
*~ |
|||
*.dbmdl |
|||
*.dbproj.schemaview |
|||
*.jfm |
|||
*.pfx |
|||
*.publishsettings |
|||
orleans.codegen.cs |
|||
|
|||
# Including strong name files can present a security risk |
|||
# (https://github.com/github/gitignore/pull/2483#issue-259490424) |
|||
#*.snk |
|||
|
|||
# Since there are multiple workflows, uncomment next line to ignore bower_components |
|||
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) |
|||
#bower_components/ |
|||
|
|||
# RIA/Silverlight projects |
|||
Generated_Code/ |
|||
|
|||
# Backup & report files from converting an old project file |
|||
# to a newer Visual Studio version. Backup files are not needed, |
|||
# because we have git ;-) |
|||
_UpgradeReport_Files/ |
|||
Backup*/ |
|||
UpgradeLog*.XML |
|||
UpgradeLog*.htm |
|||
ServiceFabricBackup/ |
|||
*.rptproj.bak |
|||
|
|||
# SQL Server files |
|||
*.mdf |
|||
*.ldf |
|||
*.ndf |
|||
|
|||
# Business Intelligence projects |
|||
*.rdl.data |
|||
*.bim.layout |
|||
*.bim_*.settings |
|||
*.rptproj.rsuser |
|||
*- [Bb]ackup.rdl |
|||
*- [Bb]ackup ([0-9]).rdl |
|||
*- [Bb]ackup ([0-9][0-9]).rdl |
|||
|
|||
# Microsoft Fakes |
|||
FakesAssemblies/ |
|||
|
|||
# GhostDoc plugin setting file |
|||
*.GhostDoc.xml |
|||
|
|||
# Node.js Tools for Visual Studio |
|||
.ntvs_analysis.dat |
|||
node_modules/ |
|||
|
|||
# Visual Studio 6 build log |
|||
*.plg |
|||
|
|||
# Visual Studio 6 workspace options file |
|||
*.opt |
|||
|
|||
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) |
|||
*.vbw |
|||
|
|||
# Visual Studio LightSwitch build output |
|||
**/*.HTMLClient/GeneratedArtifacts |
|||
**/*.DesktopClient/GeneratedArtifacts |
|||
**/*.DesktopClient/ModelManifest.xml |
|||
**/*.Server/GeneratedArtifacts |
|||
**/*.Server/ModelManifest.xml |
|||
_Pvt_Extensions |
|||
|
|||
# Paket dependency manager |
|||
.paket/paket.exe |
|||
paket-files/ |
|||
|
|||
# FAKE - F# Make |
|||
.fake/ |
|||
|
|||
# CodeRush personal settings |
|||
.cr/personal |
|||
|
|||
# Python Tools for Visual Studio (PTVS) |
|||
__pycache__/ |
|||
*.pyc |
|||
|
|||
# Cake - Uncomment if you are using it |
|||
# tools/** |
|||
# !tools/packages.config |
|||
|
|||
# Tabs Studio |
|||
*.tss |
|||
|
|||
# Telerik's JustMock configuration file |
|||
*.jmconfig |
|||
|
|||
# BizTalk build output |
|||
*.btp.cs |
|||
*.btm.cs |
|||
*.odx.cs |
|||
*.xsd.cs |
|||
|
|||
# OpenCover UI analysis results |
|||
OpenCover/ |
|||
|
|||
# Azure Stream Analytics local run output |
|||
ASALocalRun/ |
|||
|
|||
# MSBuild Binary and Structured Log |
|||
*.binlog |
|||
|
|||
# NVidia Nsight GPU debugger configuration file |
|||
*.nvuser |
|||
|
|||
# MFractors (Xamarin productivity tool) working folder |
|||
.mfractor/ |
|||
|
|||
# Local History for Visual Studio |
|||
.localhistory/ |
|||
|
|||
# BeatPulse healthcheck temp database |
|||
healthchecksdb |
|||
|
|||
# Backup folder for Package Reference Convert tool in Visual Studio 2017 |
|||
MigrationBackup/ |
|||
|
|||
# Ionide (cross platform F# VS Code tools) working folder |
|||
.ionide/ |
|||
|
|||
# Fody - auto-generated XML schema |
|||
FodyWeavers.xsd |
@ -0,0 +1,31 @@ |
|||
|
|||
Microsoft Visual Studio Solution File, Format Version 12.00 |
|||
# Visual Studio Version 16 |
|||
VisualStudioVersion = 16.0.34407.143 |
|||
MinimumVisualStudioVersion = 10.0.40219.1 |
|||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "csISO2022JP_test", "csISO2022JP_test\csISO2022JP_test.vcxproj", "{DC461BBE-2550-4427-A48F-BD6AA7D64E57}" |
|||
EndProject |
|||
Global |
|||
GlobalSection(SolutionConfigurationPlatforms) = preSolution |
|||
Debug|x64 = Debug|x64 |
|||
Debug|x86 = Debug|x86 |
|||
Release|x64 = Release|x64 |
|||
Release|x86 = Release|x86 |
|||
EndGlobalSection |
|||
GlobalSection(ProjectConfigurationPlatforms) = postSolution |
|||
{DC461BBE-2550-4427-A48F-BD6AA7D64E57}.Debug|x64.ActiveCfg = Debug|x64 |
|||
{DC461BBE-2550-4427-A48F-BD6AA7D64E57}.Debug|x64.Build.0 = Debug|x64 |
|||
{DC461BBE-2550-4427-A48F-BD6AA7D64E57}.Debug|x86.ActiveCfg = Debug|Win32 |
|||
{DC461BBE-2550-4427-A48F-BD6AA7D64E57}.Debug|x86.Build.0 = Debug|Win32 |
|||
{DC461BBE-2550-4427-A48F-BD6AA7D64E57}.Release|x64.ActiveCfg = Release|x64 |
|||
{DC461BBE-2550-4427-A48F-BD6AA7D64E57}.Release|x64.Build.0 = Release|x64 |
|||
{DC461BBE-2550-4427-A48F-BD6AA7D64E57}.Release|x86.ActiveCfg = Release|Win32 |
|||
{DC461BBE-2550-4427-A48F-BD6AA7D64E57}.Release|x86.Build.0 = Release|Win32 |
|||
EndGlobalSection |
|||
GlobalSection(SolutionProperties) = preSolution |
|||
HideSolutionNode = FALSE |
|||
EndGlobalSection |
|||
GlobalSection(ExtensibilityGlobals) = postSolution |
|||
SolutionGuid = {20106F08-7A5C-4496-A999-8A62D53364E5} |
|||
EndGlobalSection |
|||
EndGlobal |
@ -0,0 +1,4 @@ |
|||
copy head.html.txt csISO2022JP.html |
|||
csISO2022JP_test.exe >>csISO2022JP.html |
|||
type tail.html.txt >>csISO2022JP.html |
|||
csISO2022JP.html |
@ -0,0 +1,306 @@ |
|||
#include <iostream> |
|||
#include <string>
|
|||
#include <map>
|
|||
#include <vector>
|
|||
#include <iomanip>
|
|||
#include <windows.h>
|
|||
// https://github.com/aantron/better-enums
|
|||
#include "enum.h"
|
|||
|
|||
/// <summary>
|
|||
/// 制御文字を含まず表示可能な文字の全可能性を入力してシフトJIS(コードページ932)からワイド文字の変換表を作成して返す
|
|||
/// </summary>
|
|||
/// <returns>シフトJIS1文字の文字列をキー、ワイド文字1文字の文字列を値としたstd::map</returns>
|
|||
auto create_sjis2ws() { |
|||
std::map<std::string, std::wstring> sjis2ws; |
|||
char ch[3]; |
|||
ch[1] = 0; |
|||
ch[2] = 0; |
|||
for (ch[0] = 0x20; ch[0] != 0; ++ch[0]) { |
|||
wchar_t wch[3]; |
|||
int len = MultiByteToWideChar(932, MB_ERR_INVALID_CHARS, ch, 1, wch, sizeof(wch) / sizeof(wch[0])); |
|||
if (len == 1) { |
|||
sjis2ws[std::string(ch)] = std::wstring(1, wch[0]); |
|||
} |
|||
else { |
|||
for (ch[1] = 0x20; ch[1] != 0; ++ch[1]) { |
|||
int len = MultiByteToWideChar(932, MB_ERR_INVALID_CHARS, ch, 2, wch, sizeof(wch) / sizeof(wch[0])); |
|||
if (len == 1) { |
|||
sjis2ws[std::string(ch)] = std::wstring(1, wch[0]); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
return sjis2ws; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// シフトJIS文字からワイド文字への変換表を元に、ワイド文字からシフトJIS文字ベクタへの変換表を作成して返す
|
|||
/// </summary>
|
|||
/// <typeparam name="K">シフトJIS文字列の型</typeparam>
|
|||
/// <typeparam name="V">ワイド文字列の型</typeparam>
|
|||
/// <param name="sjis2ws">シフトJIS文字からワイド文字への変換表</param>
|
|||
/// <returns>ワイド文字をキー、シフトJIS文字ベクタを値としたstd::map</returns>
|
|||
template<typename K, typename V> |
|||
auto create_ws2sjis(const std::map<K,V>& sjis2ws) { |
|||
std::map<std::wstring, std::vector<std::string>> ws2sjis; |
|||
for (const auto& sjis2ws_pair : sjis2ws) { |
|||
const auto& sjis = sjis2ws_pair.first; |
|||
const auto& ws = sjis2ws_pair.second; |
|||
auto it = ws2sjis.find(ws); |
|||
if (it == ws2sjis.end()) { |
|||
it = ws2sjis.insert(std::make_pair(ws, std::vector<std::string>())).first; |
|||
} |
|||
it->second.push_back(sjis); |
|||
} |
|||
return ws2sjis; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// シーケンスの列挙定義
|
|||
/// </summary>
|
|||
BETTER_ENUM(EscapeType, uint8_t, |
|||
ASCII, |
|||
JIS_X_201_1976, |
|||
JIS_X_0208_1978, |
|||
JIS_X_0208_1990, |
|||
JIS_X_0212_1990, |
|||
GB_2312_80, |
|||
KS_X_1001_1992, |
|||
ISO_IEC_8859_1_HIGH, |
|||
ISO_IEC_8859_7_HIGH, |
|||
JIS_X_0213_2000_1, |
|||
JIS_X_0213_2000_2, |
|||
JIS_X_0213_2004_1, |
|||
JIS_X_0201_KANA |
|||
); |
|||
|
|||
/// <summary>
|
|||
/// ISO-2022-JP-2のエスケープシーケンス
|
|||
/// </summary>
|
|||
const struct sequence_type { |
|||
/// <summary>
|
|||
/// エスケープシーケンス文字列
|
|||
/// </summary>
|
|||
std::string escapes; |
|||
/// <summary>
|
|||
/// 文字セットなどの名前
|
|||
/// </summary>
|
|||
//std::string name;
|
|||
EscapeType type; |
|||
/// <summary>
|
|||
/// 1文字に必要な符号の長さ[byte]
|
|||
/// </summary>
|
|||
int char_length; |
|||
} seq_type[] = { |
|||
{"\x1b(B", EscapeType::ASCII, 1, }, |
|||
{"\x1b(J", EscapeType::JIS_X_201_1976, 1}, |
|||
{"\x1b$@", EscapeType::JIS_X_0208_1978, 2}, |
|||
{"\x1b$B", EscapeType::JIS_X_0208_1990, 2}, |
|||
{"\x1b$(D", EscapeType::JIS_X_0212_1990, 2}, |
|||
{"\x1b$A", EscapeType::GB_2312_80, 2}, |
|||
{"\x1b$(C", EscapeType::KS_X_1001_1992, 2}, |
|||
{"\x1b.A", EscapeType::ISO_IEC_8859_1_HIGH, 1}, |
|||
{"\x1b.F", EscapeType::ISO_IEC_8859_7_HIGH, 1}, |
|||
{"\x1b$(O", EscapeType::JIS_X_0213_2000_1, 2}, |
|||
{"\x1b$(P", EscapeType::JIS_X_0213_2000_2, 2}, |
|||
{"\x1b$(Q", EscapeType::JIS_X_0213_2004_1, 2}, |
|||
{"\x1b(I", EscapeType::JIS_X_0201_KANA, 1}, |
|||
//{"\x1b$(?", "JIS X 0208-1990(gaiji)", 2},
|
|||
}; |
|||
|
|||
/// <summary>
|
|||
/// JIS文字列の先頭のエスケープシーケンスを読み取り、種別を判断して返す
|
|||
/// </summary>
|
|||
/// <typeparam name="T">文字列型</typeparam>
|
|||
/// <param name="s">JIS文字列</param>
|
|||
/// <returns>エスケープシーケンスタイプを表すポインタ</returns>
|
|||
template<typename T> |
|||
auto find_and_cut_sequence(T& s) { |
|||
const sequence_type* pst = &seq_type[0]; |
|||
for (const auto& st : seq_type) { |
|||
const auto& esc = st.escapes; |
|||
if (s.substr(0, esc.length()) == esc) { |
|||
pst = &st; |
|||
s = s.substr(esc.length(), st.char_length); |
|||
break; |
|||
} |
|||
} |
|||
if (pst == &seq_type[0] && s[0] == '\x1b') { |
|||
throw std::exception("unknown escape sequence"); |
|||
} |
|||
return pst; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// ISO-2022-JP-2からワイド文字列への変換表を作成して返す
|
|||
/// </summary>
|
|||
/// <typeparam name="T">std::pairをiterate可能なコンテナ型</typeparam>
|
|||
/// <param name="ws2sjis">std::pairのfirstにワイド文字列が入っているコンテナ</param>
|
|||
/// <returns>ISO-2022-JP-2(文字セット名, 文字列のstd::pair)からワイド文字列への変換表</returns>
|
|||
template<typename T> |
|||
auto create_jis2ws(const T& ws2sjis) { |
|||
std::map<std::pair<EscapeType, std::string>, std::wstring> jis2ws; |
|||
for (const auto& ws2sjis_pair : ws2sjis) { |
|||
const auto& ws = ws2sjis_pair.first; |
|||
char jis[100]; |
|||
auto len = WideCharToMultiByte(50221, 0, ws.c_str(), static_cast<int>(ws.length()), jis, sizeof(jis) / sizeof(jis[0]), NULL, NULL); |
|||
if (len <= 0 || (jis[0] == '?' && ws[0] != L'?')) continue; |
|||
std::string jisstr(jis, len); |
|||
auto pst = find_and_cut_sequence(jisstr); |
|||
auto key = std::make_pair(pst->type, jisstr); |
|||
if (jis2ws.find(key) != jis2ws.end()) { |
|||
throw std::exception("duplicated jis code"); |
|||
} |
|||
jis2ws[key] = ws; |
|||
} |
|||
return jis2ws; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// シフトJISコードの1バイト目と2バイト目を元にJIS X 208の区だけを返す
|
|||
/// </summary>
|
|||
/// <param name="first">1バイト目</param>
|
|||
/// <param name="second">2バイト目</param>
|
|||
/// <returns>区</returns>
|
|||
std::uint8_t get_jis208_ku(std::uint8_t first, std::uint8_t second) { |
|||
return (first << 1) - (first <= 0x9fU ? 0x0U : 0x80U) - (second < 0x9fU); |
|||
//return ((first - (first <= 0x9f) ? 0x81 : 0xc1) << 1) + 1 + (second >= 0x9f);
|
|||
} |
|||
|
|||
/// <summary>
|
|||
/// シフトJIS1文字分の文字列を受け取り、その元の文字セット分類を返す
|
|||
/// </summary>
|
|||
/// <typeparam name="T">文字列型</typeparam>
|
|||
/// <param name="s">シフトJIS1バイト分の文字列</param>
|
|||
/// <returns>分類を表す文字列</returns>
|
|||
template<typename T> |
|||
auto get_sjis_type(const T& s) { |
|||
if (s.length() == 0) throw std::exception("sjis empty!"); |
|||
if (static_cast<std::uint8_t>(s[0]) < 0x80U || (0xa1U <= static_cast<std::uint8_t>(s[0]) && static_cast<std::uint8_t>(s[0]) <= 0xdfU)) { |
|||
return std::string("JIS X 0201:1997 (ラテン文字・片仮名)"); |
|||
} |
|||
else { |
|||
if (s.length() <= 1) throw std::exception("sjis 2nd byte not found"); |
|||
auto ku = get_jis208_ku(s[0], s[1]); |
|||
if (ku == 13) { |
|||
return std::string("NEC特殊文字"); |
|||
} |
|||
else if (89 <= ku && ku <= 92) { |
|||
return std::string("NEC選定IBM拡張文字"); |
|||
} |
|||
else if (115 <= ku && ku <= 119) { |
|||
return std::string("IBM拡張文字"); |
|||
} |
|||
else if (95 <= ku && ku <= 114) { |
|||
return std::string("ユーザー定義外字"); |
|||
} |
|||
else { |
|||
return std::string("JIS X 0208:1997"); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// JISコード(タイプ&コード)から区を抽出し、サブタイプを判断して返す
|
|||
/// </summary>
|
|||
/// <typeparam name="F">シーケンスタイプ型</typeparam>
|
|||
/// <typeparam name="S">JIS文字列型</typeparam>
|
|||
/// <param name="jis">JIS文字列</param>
|
|||
/// <returns>サブタイプを表す文字列</returns>
|
|||
template<typename F, typename S> |
|||
auto get_jis_subtype(const std::pair<F,S>& jis) { |
|||
const auto ku = jis.second[0] - 0x20; |
|||
if (jis.first == +EscapeType::JIS_X_0208_1990) { |
|||
if (1 << ku && ku <= 8) { |
|||
return std::string("記号、英数字、かな"); |
|||
} |
|||
else if (16 <= ku && ku <= 47) { |
|||
return std::string("第1水準漢字"); |
|||
} |
|||
else if (48 <= ku && ku <= 84){ |
|||
return std::string("第2水準漢字"); |
|||
} |
|||
return std::string("未定義"); |
|||
} |
|||
else if (jis.first == +EscapeType::ASCII) { |
|||
return std::string(""); |
|||
} |
|||
else if (jis.first == +EscapeType::JIS_X_0201_KANA) { |
|||
return std::string(""); |
|||
} |
|||
else if (jis.first == +EscapeType::JIS_X_0212_1990) { |
|||
if (2 <= ku and ku <= 11) { |
|||
return std::string("非漢字"); |
|||
} |
|||
if (12 <= ku and ku <= 77) { |
|||
return std::string("漢字"); |
|||
} |
|||
return std::string("未定義"); |
|||
} |
|||
else { |
|||
throw std::exception("unkown type"); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// ISO-2022-JP-2順に並んだUnicodeとシフトJISの一覧表jsonを標準出力に出す
|
|||
/// </summary>
|
|||
/// <typeparam name="JIS">ISO-2022-JP-2の型</typeparam>
|
|||
/// <typeparam name="WS">ワード文字列の型</typeparam>
|
|||
/// <typeparam name="SJIS">シフトJISの型</typeparam>
|
|||
/// <param name="jis2ws">ISO-2022-JP-2(文字セット名, 文字列のstd::pair)からワイド文字列への変換表</param>
|
|||
/// <param name="ws2sjis">ワイド文字をキー、シフトJIS文字ベクタを値としたstd::map</param>
|
|||
template<typename JIS, typename WS, typename SJIS> |
|||
void print_json(const std::map<JIS, WS>& jis2ws, const std::map<WS, SJIS>& ws2sjis) { |
|||
std::cout << "["; |
|||
bool is_first = true; |
|||
for (const auto& jis2ws_pair : jis2ws) { |
|||
const auto& jis = jis2ws_pair.first; |
|||
const auto& ws = jis2ws_pair.second; |
|||
if (is_first) { |
|||
is_first = false; |
|||
} |
|||
else { |
|||
std::cout << ","; |
|||
} |
|||
std::cout << "{\"unicode\":" << static_cast<int>(ws[0]) << "," |
|||
<< "\"jis\":{\"type\":\"" << jis.first << "\"," |
|||
<< "\"code\":"; |
|||
unsigned int code = 0; |
|||
for (auto ch : jis.second) code = (code << (sizeof(ch)*8)) | static_cast<std::uint8_t>(ch); |
|||
std::cout |
|||
<< code |
|||
<< ",\"subtype\":\"" << get_jis_subtype(jis) |
|||
<< "\"}," |
|||
<< "\"ms932\":["; |
|||
bool is_first_s = true; |
|||
for (const auto& s : ws2sjis.at(ws)) { |
|||
if (is_first_s) is_first_s = false; |
|||
else std::cout << ","; |
|||
std::cout << "{\"code\":"; |
|||
code = 0; |
|||
for (auto ch : s) code = (code << (sizeof(ch) * 8)) | static_cast<std::uint8_t>(ch); |
|||
std::cout << code; |
|||
std::cout << ",\"type\":\"" << get_sjis_type(s) << "\"}"; |
|||
} |
|||
std::cout << "]}" << std::endl; |
|||
} |
|||
std::cout << "]" << std::endl; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// メイン関数
|
|||
/// </summary>
|
|||
/// <param name="argc">コマンド+引数の数</param>
|
|||
/// <param name="argv">コマンド,引数1,引数2,...の文字列</param>
|
|||
/// <returns>プロセス戻り値</returns>
|
|||
int main(int argc, char* argv[]) |
|||
{ |
|||
setlocale(LC_CTYPE, ".UTF-8"); |
|||
auto sjis2ws = create_sjis2ws(); |
|||
auto ws2sjis = create_ws2sjis(sjis2ws); |
|||
auto jis2ws = create_jis2ws(ws2sjis); |
|||
print_json(jis2ws, ws2sjis); |
|||
return 0; |
|||
} |
@ -0,0 +1,194 @@ |
|||
<?xml version="1.0" encoding="utf-8"?> |
|||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
|||
<ItemGroup Label="ProjectConfigurations"> |
|||
<ProjectConfiguration Include="Debug|Win32"> |
|||
<Configuration>Debug</Configuration> |
|||
<Platform>Win32</Platform> |
|||
</ProjectConfiguration> |
|||
<ProjectConfiguration Include="Release|Win32"> |
|||
<Configuration>Release</Configuration> |
|||
<Platform>Win32</Platform> |
|||
</ProjectConfiguration> |
|||
<ProjectConfiguration Include="Debug|x64"> |
|||
<Configuration>Debug</Configuration> |
|||
<Platform>x64</Platform> |
|||
</ProjectConfiguration> |
|||
<ProjectConfiguration Include="Release|x64"> |
|||
<Configuration>Release</Configuration> |
|||
<Platform>x64</Platform> |
|||
</ProjectConfiguration> |
|||
</ItemGroup> |
|||
<PropertyGroup Label="Globals"> |
|||
<VCProjectVersion>16.0</VCProjectVersion> |
|||
<Keyword>Win32Proj</Keyword> |
|||
<ProjectGuid>{dc461bbe-2550-4427-a48f-bd6aa7d64e57}</ProjectGuid> |
|||
<RootNamespace>csISO2022JPtest</RootNamespace> |
|||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
|||
</PropertyGroup> |
|||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
|||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> |
|||
<ConfigurationType>Application</ConfigurationType> |
|||
<UseDebugLibraries>true</UseDebugLibraries> |
|||
<PlatformToolset>v142</PlatformToolset> |
|||
<CharacterSet>Unicode</CharacterSet> |
|||
</PropertyGroup> |
|||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> |
|||
<ConfigurationType>Application</ConfigurationType> |
|||
<UseDebugLibraries>false</UseDebugLibraries> |
|||
<PlatformToolset>v142</PlatformToolset> |
|||
<WholeProgramOptimization>true</WholeProgramOptimization> |
|||
<CharacterSet>Unicode</CharacterSet> |
|||
</PropertyGroup> |
|||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
|||
<ConfigurationType>Application</ConfigurationType> |
|||
<UseDebugLibraries>true</UseDebugLibraries> |
|||
<PlatformToolset>v142</PlatformToolset> |
|||
<CharacterSet>Unicode</CharacterSet> |
|||
</PropertyGroup> |
|||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
|||
<ConfigurationType>Application</ConfigurationType> |
|||
<UseDebugLibraries>false</UseDebugLibraries> |
|||
<PlatformToolset>v142</PlatformToolset> |
|||
<WholeProgramOptimization>true</WholeProgramOptimization> |
|||
<CharacterSet>Unicode</CharacterSet> |
|||
</PropertyGroup> |
|||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
|||
<ImportGroup Label="ExtensionSettings"> |
|||
</ImportGroup> |
|||
<ImportGroup Label="Shared"> |
|||
</ImportGroup> |
|||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> |
|||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
|||
</ImportGroup> |
|||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> |
|||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
|||
</ImportGroup> |
|||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
|||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
|||
</ImportGroup> |
|||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
|||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
|||
</ImportGroup> |
|||
<PropertyGroup Label="UserMacros" /> |
|||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> |
|||
<LinkIncremental>true</LinkIncremental> |
|||
</PropertyGroup> |
|||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> |
|||
<LinkIncremental>false</LinkIncremental> |
|||
</PropertyGroup> |
|||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
|||
<LinkIncremental>true</LinkIncremental> |
|||
</PropertyGroup> |
|||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
|||
<LinkIncremental>false</LinkIncremental> |
|||
</PropertyGroup> |
|||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> |
|||
<ClCompile> |
|||
<WarningLevel>Level3</WarningLevel> |
|||
<SDLCheck>true</SDLCheck> |
|||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
|||
<ConformanceMode>true</ConformanceMode> |
|||
</ClCompile> |
|||
<Link> |
|||
<SubSystem>Console</SubSystem> |
|||
<GenerateDebugInformation>true</GenerateDebugInformation> |
|||
</Link> |
|||
<PreBuildEvent> |
|||
<Command>cd $(ProjectDir) |
|||
if not exist enum.h ( |
|||
powershell -Command wget https://raw.githubusercontent.com/aantron/better-enums/master/enum.h -OutFile enum.h |
|||
)</Command> |
|||
</PreBuildEvent> |
|||
<PostBuildEvent> |
|||
<Command>copy $(ProjectDir)*.bat $(TargetDir) |
|||
copy $(ProjectDir)*.html.txt $(TargetDir)</Command> |
|||
</PostBuildEvent> |
|||
</ItemDefinitionGroup> |
|||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> |
|||
<ClCompile> |
|||
<WarningLevel>Level3</WarningLevel> |
|||
<FunctionLevelLinking>true</FunctionLevelLinking> |
|||
<IntrinsicFunctions>true</IntrinsicFunctions> |
|||
<SDLCheck>true</SDLCheck> |
|||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
|||
<ConformanceMode>true</ConformanceMode> |
|||
</ClCompile> |
|||
<Link> |
|||
<SubSystem>Console</SubSystem> |
|||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
|||
<OptimizeReferences>true</OptimizeReferences> |
|||
<GenerateDebugInformation>true</GenerateDebugInformation> |
|||
</Link> |
|||
<PostBuildEvent> |
|||
<Command>copy $(ProjectDir)*.bat $(TargetDir) |
|||
copy $(ProjectDir)*.html.txt $(TargetDir)</Command> |
|||
</PostBuildEvent> |
|||
<PreBuildEvent> |
|||
<Command>cd $(ProjectDir) |
|||
if not exist enum.h ( |
|||
powershell -Command wget https://raw.githubusercontent.com/aantron/better-enums/master/enum.h -OutFile enum.h |
|||
)</Command> |
|||
</PreBuildEvent> |
|||
</ItemDefinitionGroup> |
|||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
|||
<ClCompile> |
|||
<WarningLevel>Level3</WarningLevel> |
|||
<SDLCheck>true</SDLCheck> |
|||
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
|||
<ConformanceMode>true</ConformanceMode> |
|||
</ClCompile> |
|||
<Link> |
|||
<SubSystem>Console</SubSystem> |
|||
<GenerateDebugInformation>true</GenerateDebugInformation> |
|||
</Link> |
|||
<PostBuildEvent> |
|||
<Command>copy $(ProjectDir)*.bat $(TargetDir) |
|||
copy $(ProjectDir)*.html.txt $(TargetDir)</Command> |
|||
</PostBuildEvent> |
|||
<PreBuildEvent> |
|||
<Command>cd $(ProjectDir) |
|||
if not exist enum.h ( |
|||
powershell -Command wget https://raw.githubusercontent.com/aantron/better-enums/master/enum.h -OutFile enum.h |
|||
)</Command> |
|||
</PreBuildEvent> |
|||
</ItemDefinitionGroup> |
|||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
|||
<ClCompile> |
|||
<WarningLevel>Level3</WarningLevel> |
|||
<FunctionLevelLinking>true</FunctionLevelLinking> |
|||
<IntrinsicFunctions>true</IntrinsicFunctions> |
|||
<SDLCheck>true</SDLCheck> |
|||
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
|||
<ConformanceMode>true</ConformanceMode> |
|||
</ClCompile> |
|||
<Link> |
|||
<SubSystem>Console</SubSystem> |
|||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
|||
<OptimizeReferences>true</OptimizeReferences> |
|||
<GenerateDebugInformation>true</GenerateDebugInformation> |
|||
</Link> |
|||
<PostBuildEvent> |
|||
<Command>copy $(ProjectDir)*.bat $(TargetDir) |
|||
copy $(ProjectDir)*.html.txt $(TargetDir)</Command> |
|||
</PostBuildEvent> |
|||
<PreBuildEvent> |
|||
<Command>cd $(ProjectDir) |
|||
if not exist enum.h ( |
|||
powershell -Command wget https://raw.githubusercontent.com/aantron/better-enums/master/enum.h -OutFile enum.h |
|||
)</Command> |
|||
</PreBuildEvent> |
|||
</ItemDefinitionGroup> |
|||
<ItemGroup> |
|||
<ClCompile Include="csISO2022JP_test.cpp" /> |
|||
</ItemGroup> |
|||
<ItemGroup> |
|||
<None Include="cat.bat" /> |
|||
</ItemGroup> |
|||
<ItemGroup> |
|||
<Text Include="head.html.txt" /> |
|||
<Text Include="tail.html.txt" /> |
|||
</ItemGroup> |
|||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
|||
<ImportGroup Label="ExtensionTargets"> |
|||
</ImportGroup> |
|||
</Project> |
@ -0,0 +1,8 @@ |
|||
<table class="kanji_code"> |
|||
<thead> |
|||
<tr><th>文字</th><th>Unicode</th><th>JIS文字集合</th><th>分類</th><th>JISコード</th><th>シフトJISコード(分類)</th></tr> |
|||
</thead> |
|||
</table> |
|||
<script> |
|||
(()=>{ |
|||
const iniitial_data = |
@ -0,0 +1,85 @@ |
|||
; |
|||
const table = document.querySelector('table'); |
|||
const tbody = document.createElement('tbody'); |
|||
iniitial_data.forEach((elm)=>{ |
|||
const tr = document.createElement('tr'); |
|||
let td = document.createElement('td'); |
|||
td.textContent = String.fromCodePoint(elm.unicode); |
|||
tr.appendChild(td); |
|||
td = document.createElement('td'); |
|||
td.textContent = `U+${('0000' + elm.unicode.toString(16)).slice(-4)}`; |
|||
tr.appendChild(td); |
|||
td = document.createElement('td'); |
|||
td.textContent = `${elm.jis.type}`; |
|||
tr.appendChild(td); |
|||
td = document.createElement('td'); |
|||
td.textContent = `${elm.jis.subtype}`; |
|||
tr.appendChild(td); |
|||
td = document.createElement('td'); |
|||
const blen = (elm.jis.code < 0x100) ? 1 : 2; |
|||
td.textContent = `0x${("00".repeat(blen) + elm.jis.code.toString(16)).slice(-2*blen)}`; |
|||
tr.appendChild(td); |
|||
td = document.createElement('td'); |
|||
td.classList.add('kanji_code_left'); |
|||
const ul = document.createElement('ul'); |
|||
elm.ms932.forEach((sjis, i)=>{ |
|||
if (i <= 2) { |
|||
const li = document.createElement('li'); |
|||
const blen = (sjis < 0x100) ? 1 : 2; |
|||
li.textContent = `0x${("00".repeat(blen) + sjis.code.toString(16)).slice(-2*blen)}(${sjis.type})`; |
|||
ul.appendChild(li); |
|||
} |
|||
}); |
|||
if (elm.ms932.length > 3) { |
|||
const li = document.createElement('li'); |
|||
li.textContent = `...(*${elm.ms932.length - 3})`; |
|||
ul.appendChild(li); |
|||
} |
|||
td.appendChild(ul); |
|||
tr.appendChild(td); |
|||
tbody.appendChild(tr); |
|||
}); |
|||
table.appendChild(tbody); |
|||
})(); |
|||
</script> |
|||
<style> |
|||
table.kanji_code { |
|||
border: 1px black solid; |
|||
border-collapse: collapse; |
|||
} |
|||
|
|||
table.kanji_code * { |
|||
font-family: monospace; |
|||
padding: 0; |
|||
margin: 0; |
|||
} |
|||
|
|||
table.kanji_code th { |
|||
border: 1px darkgray solid; |
|||
background: darkblue; |
|||
color: white; |
|||
} |
|||
|
|||
table.kanji_code td { |
|||
border: 1px darkgray solid; |
|||
text-align: center; |
|||
} |
|||
|
|||
table.kanji_code td.kanji_code_left { |
|||
text-align: left; |
|||
} |
|||
|
|||
table.kanji_code tr { |
|||
vertical-align: top; |
|||
} |
|||
|
|||
table.kanji_code ul { |
|||
padding-left: 0; |
|||
margin-bottom: 0; |
|||
} |
|||
|
|||
table.kanji_code li { |
|||
list-style: none; |
|||
margin: 0; |
|||
} |
|||
</style> |
Write
Preview
Loading…
Cancel
Save
Reference in new issue