utf8/lossy.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
use super::*;
/// A push-based, lossy decoder for UTF-8.
/// Errors are replaced with the U+FFFD replacement character.
///
/// Users “push” bytes into the decoder, which in turn “pushes” `&str` slices into a callback.
///
/// For example, `String::from_utf8_lossy` (but returning `String` instead of `Cow`)
/// can be rewritten as:
///
/// ```rust
/// fn string_from_utf8_lossy(input: &[u8]) -> String {
/// let mut string = String::new();
/// utf8::LossyDecoder::new(|s| string.push_str(s)).feed(input);
/// string
/// }
/// ```
///
/// **Note:** Dropping the decoder signals the end of the input:
/// If the last input chunk ended with an incomplete byte sequence for a code point,
/// this is an error and a replacement character is emitted.
/// Use `std::mem::forget` to inhibit this behavior.
pub struct LossyDecoder<F: FnMut(&str)> {
push_str: F,
incomplete: Incomplete,
}
impl<F: FnMut(&str)> LossyDecoder<F> {
/// Create a new decoder from a callback.
#[inline]
pub fn new(push_str: F) -> Self {
LossyDecoder {
push_str: push_str,
incomplete: Incomplete {
buffer: [0, 0, 0, 0],
buffer_len: 0,
},
}
}
/// Feed one chunk of input into the decoder.
///
/// The input is decoded lossily
/// and the callback called once or more with `&str` string slices.
///
/// If the UTF-8 byte sequence for one code point was split into this bytes chunk
/// and previous bytes chunks, it will be correctly pieced back together.
pub fn feed(&mut self, mut input: &[u8]) {
if self.incomplete.buffer_len > 0 {
match self.incomplete.try_complete(input) {
Some((Ok(s), remaining)) => {
(self.push_str)(s);
input = remaining
}
Some((Err(_), remaining)) => {
(self.push_str)(REPLACEMENT_CHARACTER);
input = remaining
}
None => {
return
}
}
}
loop {
match decode(input) {
Ok(s) => {
(self.push_str)(s);
return
}
Err(DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => {
(self.push_str)(valid_prefix);
self.incomplete = incomplete_suffix;
return
}
Err(DecodeError::Invalid { valid_prefix, remaining_input, .. }) => {
(self.push_str)(valid_prefix);
(self.push_str)(REPLACEMENT_CHARACTER);
input = remaining_input
}
}
}
}
}
impl<F: FnMut(&str)> Drop for LossyDecoder<F> {
#[inline]
fn drop(&mut self) {
if self.incomplete.buffer_len > 0 {
(self.push_str)(REPLACEMENT_CHARACTER)
}
}
}