00001
00002
00003
00004
00005
00006
00007
00008 #include <botan/charset.h>
00009 #include <botan/parsing.h>
00010 #include <botan/exceptn.h>
00011 #include <cctype>
00012
00013 namespace Botan {
00014
00015 namespace Charset {
00016
00017 namespace {
00018
00019
00020
00021
00022 std::string ucs2_to_latin1(const std::string& ucs2)
00023 {
00024 if(ucs2.size() % 2 == 1)
00025 throw Decoding_Error("UCS-2 string has an odd number of bytes");
00026
00027 std::string latin1;
00028
00029 for(u32bit j = 0; j != ucs2.size(); j += 2)
00030 {
00031 const byte c1 = ucs2[j];
00032 const byte c2 = ucs2[j+1];
00033
00034 if(c1 != 0)
00035 throw Decoding_Error("UCS-2 has non-Latin1 characters");
00036
00037 latin1 += static_cast<char>(c2);
00038 }
00039
00040 return latin1;
00041 }
00042
00043
00044
00045
00046 std::string utf8_to_latin1(const std::string& utf8)
00047 {
00048 std::string iso8859;
00049
00050 u32bit position = 0;
00051 while(position != utf8.size())
00052 {
00053 const byte c1 = static_cast<byte>(utf8[position++]);
00054
00055 if(c1 <= 0x7F)
00056 iso8859 += static_cast<char>(c1);
00057 else if(c1 >= 0xC0 && c1 <= 0xC7)
00058 {
00059 if(position == utf8.size())
00060 throw Decoding_Error("UTF-8: sequence truncated");
00061
00062 const byte c2 = static_cast<byte>(utf8[position++]);
00063 const byte iso_char = ((c1 & 0x07) << 6) | (c2 & 0x3F);
00064
00065 if(iso_char <= 0x7F)
00066 throw Decoding_Error("UTF-8: sequence longer than needed");
00067
00068 iso8859 += static_cast<char>(iso_char);
00069 }
00070 else
00071 throw Decoding_Error("UTF-8: Unicode chars not in Latin1 used");
00072 }
00073
00074 return iso8859;
00075 }
00076
00077
00078
00079
00080 std::string latin1_to_utf8(const std::string& iso8859)
00081 {
00082 std::string utf8;
00083 for(u32bit j = 0; j != iso8859.size(); ++j)
00084 {
00085 const byte c = static_cast<byte>(iso8859[j]);
00086
00087 if(c <= 0x7F)
00088 utf8 += static_cast<char>(c);
00089 else
00090 {
00091 utf8 += static_cast<char>((0xC0 | (c >> 6)));
00092 utf8 += static_cast<char>((0x80 | (c & 0x3F)));
00093 }
00094 }
00095 return utf8;
00096 }
00097
00098 }
00099
00100
00101
00102
00103 std::string transcode(const std::string& str,
00104 Character_Set to, Character_Set from)
00105 {
00106 if(to == LOCAL_CHARSET)
00107 to = LATIN1_CHARSET;
00108 if(from == LOCAL_CHARSET)
00109 from = LATIN1_CHARSET;
00110
00111 if(to == from)
00112 return str;
00113
00114 if(from == LATIN1_CHARSET && to == UTF8_CHARSET)
00115 return latin1_to_utf8(str);
00116 if(from == UTF8_CHARSET && to == LATIN1_CHARSET)
00117 return utf8_to_latin1(str);
00118 if(from == UCS2_CHARSET && to == LATIN1_CHARSET)
00119 return ucs2_to_latin1(str);
00120
00121 throw Invalid_Argument("Unknown transcoding operation from " +
00122 to_string(from) + " to " + to_string(to));
00123 }
00124
00125
00126
00127
00128 bool is_digit(char c)
00129 {
00130 if(c == '0' || c == '1' || c == '2' || c == '3' || c == '4' ||
00131 c == '5' || c == '6' || c == '7' || c == '8' || c == '9')
00132 return true;
00133 return false;
00134 }
00135
00136
00137
00138
00139 bool is_space(char c)
00140 {
00141 if(c == ' ' || c == '\t' || c == '\n' || c == '\r')
00142 return true;
00143 return false;
00144 }
00145
00146
00147
00148
00149 byte char2digit(char c)
00150 {
00151 switch(c)
00152 {
00153 case '0': return 0;
00154 case '1': return 1;
00155 case '2': return 2;
00156 case '3': return 3;
00157 case '4': return 4;
00158 case '5': return 5;
00159 case '6': return 6;
00160 case '7': return 7;
00161 case '8': return 8;
00162 case '9': return 9;
00163 }
00164
00165 throw Invalid_Argument("char2digit: Input is not a digit character");
00166 }
00167
00168
00169
00170
00171 char digit2char(byte b)
00172 {
00173 switch(b)
00174 {
00175 case 0: return '0';
00176 case 1: return '1';
00177 case 2: return '2';
00178 case 3: return '3';
00179 case 4: return '4';
00180 case 5: return '5';
00181 case 6: return '6';
00182 case 7: return '7';
00183 case 8: return '8';
00184 case 9: return '9';
00185 }
00186
00187 throw Invalid_Argument("digit2char: Input is not a digit");
00188 }
00189
00190
00191
00192
00193 bool caseless_cmp(char a, char b)
00194 {
00195 return (std::tolower(static_cast<unsigned char>(a)) ==
00196 std::tolower(static_cast<unsigned char>(b)));
00197 }
00198
00199 }
00200
00201 }