markup5ever/util/smallcharset.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
// Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! This module contains a single struct [`SmallCharSet`]. See its documentation for details.
//!
//! [`SmallCharSet`]: struct.SmallCharSet.html
/// Represents a set of "small characters", those with Unicode scalar
/// values less than 64.
///
/// This is stored as a bitmap, with 1 bit for each value.
#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
pub struct SmallCharSet {
pub bits: u64,
}
impl SmallCharSet {
/// Checks whether a character (u8 value below 64) is stored in the SmallCharSet.
///
/// # Examples
///
/// ```ignore
/// # use markup5ever::SmallCharSet;
/// let set = SmallCharSet {
/// bits: 0b00000000_01000000_00000100_00000000_00000000_00000000_00010000_00000000
/// };
/// assert!(set.contains(64));
/// assert!(set.contains(b'6')); // `b'6'` is the same as 64u8
/// ```
#[inline]
fn contains(&self, n: u8) -> bool {
0 != (self.bits & (1 << (n as usize)))
}
/// Count the number of bytes of characters at the beginning of `buf` which are not in the set.
///
/// This functionality is used in [`BufferQueue::pop_except_from`].
///
/// # Examples
///
/// ```
/// # #[macro_use] extern crate markup5ever;
/// # fn main() {
/// let set = small_char_set!(48 49 50); // '0' '1' '2'
/// // `test` is 4 chars, ๐ is 4 chars, then we meet a character in the set
/// let test_str = "test๐01232afd";
/// assert_eq!(set.nonmember_prefix_len(test_str), 8);
/// # }
/// ```
///
/// [`BufferQueue::pop_except_from`]: buffer_queue/struct.BufferQueue.html#method.pop_except_from
pub fn nonmember_prefix_len(&self, buf: &str) -> u32 {
let mut n = 0;
for b in buf.bytes() {
if b >= 64 || !self.contains(b) {
n += 1;
} else {
break;
}
}
n
}
}
#[cfg(test)]
mod test {
#[test]
fn nonmember_prefix() {
for &c in ['&', '\0'].iter() {
for x in 0..48u32 {
for y in 0..48u32 {
let mut s = "x".repeat(x as usize);
s.push(c);
s.push_str(&"x".repeat(y as usize));
let set = small_char_set!('&' '\0');
assert_eq!(x, set.nonmember_prefix_len(&s));
}
}
}
}
}