tendril/
utf8_decode.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use fmt;
use tendril::{Atomicity, Tendril};
use utf8;

pub struct IncompleteUtf8(utf8::Incomplete);

impl<A> Tendril<fmt::Bytes, A>
where
    A: Atomicity,
{
    pub fn decode_utf8_lossy<F>(mut self, mut push_utf8: F) -> Option<IncompleteUtf8>
    where
        F: FnMut(Tendril<fmt::UTF8, A>),
    {
        loop {
            if self.is_empty() {
                return None;
            }
            let unborrowed_result = match utf8::decode(&self) {
                Ok(s) => {
                    debug_assert!(s.as_ptr() == self.as_ptr());
                    debug_assert!(s.len() == self.len());
                    Ok(())
                }
                Err(utf8::DecodeError::Invalid {
                    valid_prefix,
                    invalid_sequence,
                    ..
                }) => {
                    debug_assert!(valid_prefix.as_ptr() == self.as_ptr());
                    debug_assert!(valid_prefix.len() <= self.len());
                    Err((
                        valid_prefix.len(),
                        Err(valid_prefix.len() + invalid_sequence.len()),
                    ))
                }
                Err(utf8::DecodeError::Incomplete {
                    valid_prefix,
                    incomplete_suffix,
                }) => {
                    debug_assert!(valid_prefix.as_ptr() == self.as_ptr());
                    debug_assert!(valid_prefix.len() <= self.len());
                    Err((valid_prefix.len(), Ok(incomplete_suffix)))
                }
            };
            match unborrowed_result {
                Ok(()) => {
                    unsafe { push_utf8(self.reinterpret_without_validating()) }
                    return None;
                }
                Err((valid_len, and_then)) => {
                    if valid_len > 0 {
                        let subtendril = self.subtendril(0, valid_len as u32);
                        unsafe { push_utf8(subtendril.reinterpret_without_validating()) }
                    }
                    match and_then {
                        Ok(incomplete) => return Some(IncompleteUtf8(incomplete)),
                        Err(offset) => {
                            push_utf8(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER));
                            self.pop_front(offset as u32)
                        }
                    }
                }
            }
        }
    }
}

impl IncompleteUtf8 {
    pub fn try_complete<A, F>(
        &mut self,
        mut input: Tendril<fmt::Bytes, A>,
        mut push_utf8: F,
    ) -> Result<Tendril<fmt::Bytes, A>, ()>
    where
        A: Atomicity,
        F: FnMut(Tendril<fmt::UTF8, A>),
    {
        let resume_at;
        match self.0.try_complete(&input) {
            None => return Err(()),
            Some((result, rest)) => {
                push_utf8(Tendril::from_slice(
                    result.unwrap_or(utf8::REPLACEMENT_CHARACTER),
                ));
                resume_at = input.len() - rest.len();
            }
        }
        input.pop_front(resume_at as u32);
        Ok(input)
    }
}