cfb/internal/
path.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
use std::cmp::Ordering;
use std::io;
use std::path::{Component, Path, PathBuf};

// ========================================================================= //

const MAX_NAME_LEN: usize = 31;

// ========================================================================= //

/// Compares two directory entry names according to CFB ordering, which is
/// case-insensitive, and which always puts shorter names before longer names,
/// as encoded in UTF-16 (i.e. [shortlex
/// order](https://en.wikipedia.org/wiki/Shortlex_order), rather than
/// dictionary order).
pub fn compare_names(name1: &str, name2: &str) -> Ordering {
    match name1.encode_utf16().count().cmp(&name2.encode_utf16().count()) {
        // This is actually not 100% correct -- the MS-CFB spec specifies a
        // particular way of doing the uppercasing on individual UTF-16 code
        // units, along with a list of weird exceptions and corner cases.  But
        // hopefully this is good enough for 99+% of the time.
        Ordering::Equal => name1.to_uppercase().cmp(&name2.to_uppercase()),
        other => other,
    }
}

/// Converts a storage/stream name to UTF-16, or returns an error if the name
/// is invalid.
pub fn validate_name(name: &str) -> io::Result<Vec<u16>> {
    let name_utf16: Vec<u16> =
        name.encode_utf16().take(MAX_NAME_LEN + 1).collect();
    if name_utf16.len() > MAX_NAME_LEN {
        invalid_input!(
            "Object name cannot be more than {} UTF-16 code units \
                        (was {})",
            MAX_NAME_LEN,
            name.encode_utf16().count()
        );
    }
    for &chr in &['/', '\\', ':', '!'] {
        if name.contains(chr) {
            invalid_input!("Object name cannot contain {} character", chr);
        }
    }
    Ok(name_utf16)
}

// ========================================================================= //

/// Given a path within a compound file, turns it into a list of child names
/// descending from the root.  Returns an error if the name is invalid.
pub fn name_chain_from_path(path: &Path) -> io::Result<Vec<&str>> {
    let mut names: Vec<&str> = Vec::new();
    for component in path.components() {
        match component {
            Component::Prefix(_) => {
                invalid_input!("Invalid path (must not have prefix)");
            }
            Component::RootDir => names.clear(),
            Component::CurDir => {}
            Component::ParentDir => {
                if names.pop().is_none() {
                    invalid_input!("Invalid path (must be within root)");
                }
            }
            Component::Normal(osstr) => match osstr.to_str() {
                Some(name) => names.push(name),
                None => invalid_input!("Non UTF-8 path"),
            },
        }
    }
    Ok(names)
}

pub fn path_from_name_chain(names: &[&str]) -> PathBuf {
    let mut path = PathBuf::from("/");
    for name in names {
        path.push(name);
    }
    path
}

// ========================================================================= //

#[cfg(test)]
mod tests {
    use super::{
        compare_names, name_chain_from_path, path_from_name_chain,
        validate_name,
    };
    use std::cmp::Ordering;
    use std::path::{Path, PathBuf};

    #[test]
    fn name_ordering() {
        assert_eq!(compare_names("foobar", "FOOBAR"), Ordering::Equal);
        assert_eq!(compare_names("foo", "barfoo"), Ordering::Less);
        assert_eq!(compare_names("Foo", "bar"), Ordering::Greater);
    }

    #[test]
    fn short_name_is_valid() {
        assert_eq!(
            validate_name("Foobar").unwrap(),
            vec![70, 111, 111, 98, 97, 114]
        );
    }

    #[test]
    #[should_panic(
        expected = "Object name cannot be more than 31 UTF-16 code \
                               units (was 35)"
    )]
    fn long_name_is_invalid() {
        validate_name("ThisNameIsMostDefinitelyMuchTooLong").unwrap();
    }

    #[test]
    #[should_panic(expected = "Object name cannot contain / character")]
    fn name_with_slash_is_invalid() {
        validate_name("foo/bar").unwrap();
    }

    #[test]
    fn absolute_path_is_valid() {
        assert_eq!(
            name_chain_from_path(&Path::new("/foo/bar/baz/")).unwrap(),
            vec!["foo", "bar", "baz"]
        );
    }

    #[test]
    fn relative_path_is_valid() {
        assert_eq!(
            name_chain_from_path(&Path::new("foo/bar/baz")).unwrap(),
            vec!["foo", "bar", "baz"]
        );
    }

    #[test]
    fn path_with_parents_is_valid() {
        assert_eq!(
            name_chain_from_path(&Path::new("foo/bar/../baz")).unwrap(),
            vec!["foo", "baz"]
        );
    }

    #[test]
    #[should_panic(expected = "Invalid path (must be within root)")]
    fn parent_of_root_is_invalid() {
        name_chain_from_path(&Path::new("foo/../../baz")).unwrap();
    }

    #[test]
    fn canonical_path_is_absolute() {
        let path = Path::new("foo/bar/../baz");
        let names = name_chain_from_path(&path).unwrap();
        assert_eq!(path_from_name_chain(&names), PathBuf::from("/foo/baz"));
    }
}

// ========================================================================= //