1use encoding::all::{GB18030, ISO_8859_1, ISO_8859_2, ISO_8859_3, ISO_8859_4, ISO_8859_5, UTF_8};
26use encoding::{DecoderTrap, EncoderTrap, Encoding, RawDecoder, StringWriter};
27use snafu::{Backtrace, Snafu};
28use std::borrow::Cow;
29use std::fmt::Debug;
30
31#[derive(Debug, Snafu)]
33#[non_exhaustive]
34pub enum EncodeTextError {
35 #[snafu(display("{}", message))]
39 EncodeCustom {
40 message: Cow<'static, str>,
42 backtrace: Backtrace,
44 },
45}
46
47#[derive(Debug, Snafu)]
49#[non_exhaustive]
50pub enum DecodeTextError {
51 #[snafu(display("{}", message))]
55 DecodeCustom {
56 message: Cow<'static, str>,
58 backtrace: Backtrace,
60 },
61}
62
63type EncodeResult<T> = Result<T, EncodeTextError>;
64type DecodeResult<T> = Result<T, DecodeTextError>;
65
66pub trait TextCodec {
69 fn name(&self) -> Cow<'static, str>;
77
78 fn decode(&self, text: &[u8]) -> DecodeResult<String>;
82
83 fn encode(&self, text: &str) -> EncodeResult<Vec<u8>>;
87}
88
89impl<T: ?Sized> TextCodec for Box<T>
90where
91 T: TextCodec,
92{
93 fn name(&self) -> Cow<'static, str> {
94 self.as_ref().name()
95 }
96
97 fn decode(&self, text: &[u8]) -> DecodeResult<String> {
98 self.as_ref().decode(text)
99 }
100
101 fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
102 self.as_ref().encode(text)
103 }
104}
105
106impl<'a, T: ?Sized> TextCodec for &'a T
107where
108 T: TextCodec,
109{
110 fn name(&self) -> Cow<'static, str> {
111 (**self).name()
112 }
113
114 fn decode(&self, text: &[u8]) -> DecodeResult<String> {
115 (**self).decode(text)
116 }
117
118 fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
119 (**self).encode(text)
120 }
121}
122
123#[derive(Debug, Default, Clone, PartialEq)]
140pub struct SpecificCharacterSet(CharsetImpl);
141
142impl SpecificCharacterSet {
143 pub const ISO_IR_6: SpecificCharacterSet = SpecificCharacterSet(CharsetImpl::Default);
145
146 pub const ISO_IR_100: SpecificCharacterSet = SpecificCharacterSet(CharsetImpl::IsoIr100);
148
149 pub const ISO_IR_192: SpecificCharacterSet = SpecificCharacterSet(CharsetImpl::IsoIr192);
151
152 pub fn from_code(code: &str) -> Option<Self> {
166 CharsetImpl::from_code(code).map(SpecificCharacterSet)
167 }
168}
169
170impl TextCodec for SpecificCharacterSet {
171 fn name(&self) -> Cow<'static, str> {
172 self.0.name()
173 }
174
175 fn decode(&self, text: &[u8]) -> DecodeResult<String> {
176 self.0.decode(text)
177 }
178
179 fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
180 self.0.encode(text)
181 }
182}
183
184#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
186#[non_exhaustive]
187enum CharsetImpl {
188 #[default]
190 Default,
191 IsoIr100,
194 IsoIr101,
197 IsoIr109,
200 IsoIr110,
203 IsoIr144,
205 IsoIr192,
207 Gb18030,
209 }
211
212impl CharsetImpl {
213 pub fn from_code(uid: &str) -> Option<Self> {
218 use self::CharsetImpl::*;
219 match uid.trim_end() {
220 "Default" | "ISO_IR_6" | "ISO_IR 6" | "ISO 2022 IR 6" => Some(Default),
221 "ISO_IR_100" | "ISO_IR 100" | "ISO 2022 IR 100" => Some(IsoIr100),
222 "ISO_IR_101" | "ISO_IR 101" | "ISO 2022 IR 101" => Some(IsoIr101),
223 "ISO_IR_109" | "ISO_IR 109" | "ISO 2022 IR 109" => Some(IsoIr109),
224 "ISO_IR_110" | "ISO_IR 110" | "ISO 2022 IR 110" => Some(IsoIr110),
225 "ISO_IR_144" | "ISO_IR 144" | "ISO 2022 IR 144" => Some(IsoIr144),
226 "ISO_IR_192" | "ISO_IR 192" => Some(IsoIr192),
227 "GB18030" => Some(Gb18030),
228 _ => None,
229 }
230 }
231}
232
233impl TextCodec for CharsetImpl {
234 fn name(&self) -> Cow<'static, str> {
235 Cow::Borrowed(match self {
236 CharsetImpl::Default => "ISO_IR 6",
237 CharsetImpl::IsoIr100 => "ISO_IR 100",
238 CharsetImpl::IsoIr101 => "ISO_IR 101",
239 CharsetImpl::IsoIr109 => "ISO_IR 109",
240 CharsetImpl::IsoIr110 => "ISO_IR 110",
241 CharsetImpl::IsoIr144 => "ISO_IR 144",
242 CharsetImpl::IsoIr192 => "ISO_IR 192",
243 CharsetImpl::Gb18030 => "GB18030",
244 })
245 }
246
247 fn decode(&self, text: &[u8]) -> DecodeResult<String> {
248 match self {
249 CharsetImpl::Default => DefaultCharacterSetCodec.decode(text),
250 CharsetImpl::IsoIr100 => IsoIr100CharacterSetCodec.decode(text),
251 CharsetImpl::IsoIr101 => IsoIr101CharacterSetCodec.decode(text),
252 CharsetImpl::IsoIr109 => IsoIr109CharacterSetCodec.decode(text),
253 CharsetImpl::IsoIr110 => IsoIr110CharacterSetCodec.decode(text),
254 CharsetImpl::IsoIr144 => IsoIr144CharacterSetCodec.decode(text),
255 CharsetImpl::IsoIr192 => Utf8CharacterSetCodec.decode(text),
256 CharsetImpl::Gb18030 => Gb18030CharacterSetCodec.decode(text),
257 }
258 }
259
260 fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
261 match self {
262 CharsetImpl::Default => DefaultCharacterSetCodec.encode(text),
263 CharsetImpl::IsoIr100 => IsoIr100CharacterSetCodec.encode(text),
264 CharsetImpl::IsoIr101 => IsoIr101CharacterSetCodec.encode(text),
265 CharsetImpl::IsoIr109 => IsoIr109CharacterSetCodec.encode(text),
266 CharsetImpl::IsoIr110 => IsoIr110CharacterSetCodec.encode(text),
267 CharsetImpl::IsoIr144 => IsoIr144CharacterSetCodec.encode(text),
268 CharsetImpl::IsoIr192 => Utf8CharacterSetCodec.encode(text),
269 CharsetImpl::Gb18030 => Gb18030CharacterSetCodec.encode(text),
270 }
271 }
272}
273
274fn decode_text_trap(
275 _decoder: &mut dyn RawDecoder,
276 input: &[u8],
277 output: &mut dyn StringWriter,
278) -> bool {
279 let c = input[0];
280 let o0 = c & 7;
281 let o1 = (c & 56) >> 3;
282 let o2 = (c & 192) >> 6;
283 output.write_char('\\');
284 output.write_char((o2 + b'0') as char);
285 output.write_char((o1 + b'0') as char);
286 output.write_char((o0 + b'0') as char);
287 true
288}
289
290macro_rules! decl_character_set {
292 ($typ: ident, $term: literal, $val: expr) => {
293 #[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
294 #[doc = "Data type for the "]
295 #[doc = $term]
296 #[doc = "character set encoding."]
297 pub struct $typ;
298
299 impl TextCodec for $typ {
300 fn name(&self) -> Cow<'static, str> {
301 Cow::Borrowed($term)
302 }
303
304 fn decode(&self, text: &[u8]) -> DecodeResult<String> {
305 $val.decode(text, DecoderTrap::Call(decode_text_trap))
306 .map_err(|message| DecodeCustomSnafu { message }.build())
307 }
308
309 fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
310 $val.encode(text, EncoderTrap::Strict)
311 .map_err(|message| EncodeCustomSnafu { message }.build())
312 }
313 }
314 };
315}
316
317#[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
319pub struct DefaultCharacterSetCodec;
320
321impl TextCodec for DefaultCharacterSetCodec {
322 fn name(&self) -> Cow<'static, str> {
323 Cow::Borrowed("ISO_IR 6")
324 }
325
326 fn decode(&self, text: &[u8]) -> DecodeResult<String> {
327 ISO_8859_1
330 .decode(text, DecoderTrap::Call(decode_text_trap))
331 .map_err(|message| DecodeCustomSnafu { message }.build())
332 }
333
334 fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
335 ISO_8859_1
336 .encode(text, EncoderTrap::Strict)
337 .map_err(|message| EncodeCustomSnafu { message }.build())
338 }
339}
340
341decl_character_set!(IsoIr100CharacterSetCodec, "ISO_IR 100", ISO_8859_1);
342decl_character_set!(IsoIr101CharacterSetCodec, "ISO_IR 101", ISO_8859_2);
343decl_character_set!(IsoIr109CharacterSetCodec, "ISO_IR 109", ISO_8859_3);
344decl_character_set!(IsoIr110CharacterSetCodec, "ISO_IR 110", ISO_8859_4);
345decl_character_set!(IsoIr144CharacterSetCodec, "ISO_IR 144", ISO_8859_5);
346decl_character_set!(Utf8CharacterSetCodec, "ISO_IR 192", UTF_8);
347decl_character_set!(Gb18030CharacterSetCodec, "GB18030", GB18030);
348
349#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
351pub enum TextValidationOutcome {
352 Ok,
354 BadCharacters,
356 NotOk,
358}
359
360pub fn validate_iso_8859(text: &[u8]) -> TextValidationOutcome {
362 if ISO_8859_1.decode(text, DecoderTrap::Strict).is_err() {
363 match ISO_8859_1.decode(text, DecoderTrap::Call(decode_text_trap)) {
364 Ok(_) => TextValidationOutcome::BadCharacters,
365 Err(_) => TextValidationOutcome::NotOk,
366 }
367 } else {
368 TextValidationOutcome::Ok
369 }
370}
371
372pub fn validate_da(text: &[u8]) -> TextValidationOutcome {
375 if text.iter().cloned().all(|c| c.is_ascii_digit()) {
376 TextValidationOutcome::Ok
377 } else {
378 TextValidationOutcome::NotOk
379 }
380}
381
382pub fn validate_tm(text: &[u8]) -> TextValidationOutcome {
385 if text.iter().cloned().all(|c| match c {
386 b'\\' | b'.' | b'-' | b' ' => true,
387 c => c.is_ascii_digit(),
388 }) {
389 TextValidationOutcome::Ok
390 } else {
391 TextValidationOutcome::NotOk
392 }
393}
394
395pub fn validate_dt(text: &[u8]) -> TextValidationOutcome {
398 if text.iter().cloned().all(|c| match c {
399 b'.' | b'-' | b'+' | b' ' | b'\\' => true,
400 c => c.is_ascii_digit(),
401 }) {
402 TextValidationOutcome::Ok
403 } else {
404 TextValidationOutcome::NotOk
405 }
406}
407
408pub fn validate_cs(text: &[u8]) -> TextValidationOutcome {
411 if text.iter().cloned().all(|c| match c {
412 b' ' | b'_' => true,
413 c => c.is_ascii_digit() || c.is_ascii_uppercase(),
414 }) {
415 TextValidationOutcome::Ok
416 } else {
417 TextValidationOutcome::NotOk
418 }
419}
420
421#[cfg(test)]
422mod tests {
423 use super::*;
424
425 fn test_codec<T>(codec: T, string: &str, bytes: &[u8])
426 where
427 T: TextCodec,
428 {
429 assert_eq!(codec.encode(string).expect("encoding"), bytes);
430 assert_eq!(codec.decode(bytes).expect("decoding"), string);
431 }
432
433 #[test]
434 fn iso_ir_6_baseline() {
435 let codec = SpecificCharacterSet::default();
436 test_codec(codec, "Smith^John", b"Smith^John");
437 }
438
439 #[test]
440 fn iso_ir_192_baseline() {
441 let codec = SpecificCharacterSet::ISO_IR_192;
442 test_codec(&codec, "Simões^John", "Simões^John".as_bytes());
443 test_codec(codec, "Иванков^Андрей", "Иванков^Андрей".as_bytes());
444 }
445
446 #[test]
447 fn iso_ir_100_baseline() {
448 let codec = SpecificCharacterSet(CharsetImpl::IsoIr100);
449 test_codec(&codec, "Simões^João", b"Sim\xF5es^Jo\xE3o");
450 test_codec(codec, "Günther^Hans", b"G\xfcnther^Hans");
451 }
452
453 #[test]
454 fn iso_ir_101_baseline() {
455 let codec = SpecificCharacterSet(CharsetImpl::IsoIr101);
456 test_codec(codec, "Günther^Hans", b"G\xfcnther^Hans");
457 }
458
459 #[test]
460 fn iso_ir_144_baseline() {
461 let codec = SpecificCharacterSet(CharsetImpl::IsoIr144);
462 test_codec(
463 codec,
464 "Иванков^Андрей",
465 b"\xb8\xd2\xd0\xdd\xda\xde\xd2^\xb0\xdd\xd4\xe0\xd5\xd9",
466 );
467 }
468}