decancer/
codepoints.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#[cfg(feature = "options")]
use crate::Options;
use crate::{
  similar::SIMILAR_START,
  translation::Translation,
  util::{Binary, CODEPOINT_MASK},
};
use std::cmp::Ordering;

pub(crate) const CODEPOINTS: Binary = Binary::new(include_bytes!("../bin/codepoints.bin"));

pub(crate) const CASE_SENSITIVE_CODEPOINTS_COUNT: u16 =
  ((SIMILAR_START - CASE_SENSITIVE_CODEPOINTS_OFFSET) / 6) - 1;
pub(crate) const CASE_SENSITIVE_CODEPOINTS_OFFSET: u16 = CODEPOINTS.u16_at(0);
pub(crate) const CODEPOINTS_COUNT: u16 = ((CASE_SENSITIVE_CODEPOINTS_OFFSET - 6) / 6) - 1;

const RANGE_MASK: u32 = 0x0800_0000;
const STRING_TRANSLATION_MASK: u32 = 0x1000_0000;

#[derive(Copy, Clone)]
#[cfg_attr(not(feature = "options"), allow(dead_code))]
pub(crate) struct Codepoint(u32, u8, u8);

impl Codepoint {
  const fn get_codepoint(self) -> u32 {
    self.0 & CODEPOINT_MASK
  }

  const fn range_end(self) -> Option<u32> {
    if (self.0 & RANGE_MASK) != 0 {
      Some((self.1 & 0x7f) as _)
    } else {
      None
    }
  }

  const fn is_string_translation(self) -> bool {
    (self.0 & STRING_TRANSLATION_MASK) != 0
  }

  const fn ascii_translation(self) -> u32 {
    (self.0 >> 20) & 0x7f
  }

  const fn is_translation_synced(self) -> bool {
    self.1 >= 0x80
  }

  pub(crate) const fn at(offset: i32) -> Self {
    Self(
      CODEPOINTS.u32_at(offset as _),
      CODEPOINTS.at((4 + offset) as _),
      CODEPOINTS.at((5 + offset) as _),
    )
  }

  pub(crate) const fn matches(
    self,
    other: u32,
    #[cfg(feature = "options")] options: Options,
  ) -> Option<Ordering> {
    let mut conf = self.get_codepoint();

    if other < conf {
      return Some(Ordering::Less);
    } else if let Some(range_end) = self.range_end() {
      conf += range_end;
    }

    if other > conf {
      return Some(Ordering::Greater);
    }

    #[cfg(feature = "options")]
    if options.refuse_cure(self.2) {
      return None;
    }

    Some(Ordering::Equal)
  }

  pub(crate) fn translation(self, other: u32) -> Translation {
    if self.is_string_translation() {
      Translation::string(self.0, self.1)
    } else {
      let mut code = self.ascii_translation();

      if code == 0 {
        return Translation::None;
      } else if self.is_translation_synced() {
        code += other - self.get_codepoint();
      }

      Translation::character(code)
    }
  }
}