string_cache/lib.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
// Copyright 2014 The Servo Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//!
//! A library for interning things that are `AsRef<str>`.
//!
//! Some strings may be interned at compile time using the `string-cache-codegen` crate, or the
//! `EmptyStaticAtomSet` may be used that has no compile-time interned strings. An `Atom` is an
//! interned string for a given set (either `EmptyStaticAtomSet` or a generated `StaticAtomSet`).
//!
//! Generated `Atom`s will have assocated macros to intern static strings at compile-time.
//!
//! # Examples
//!
//! Here are two examples, one with compile-time `Atom`s, and one without.
//!
//! ## With compile-time atoms
//!
//! In `Cargo.toml`:
//! ```toml
//! [dependencies]
//! string_cache = "0.8"
//!
//! [dev-dependencies]
//! string_cache_codegen = "0.5"
//! ```
//!
//! In `build.rs`:
//!
//! ```ignore
//! extern crate string_cache_codegen;
//!
//! use std::env;
//! use std::path::Path;
//!
//! fn main() {
//! string_cache_codegen::AtomType::new("foo::FooAtom", "foo_atom!")
//! .atoms(&["foo", "bar"])
//! .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs"))
//! .unwrap()
//! }
//! ```
//!
//! In `lib.rs`:
//!
//! ```ignore
//! extern crate string_cache;
//!
//! mod foo {
//! include!(concat!(env!("OUT_DIR"), "/foo_atom.rs"));
//! }
//!
//! fn use_the_atom(t: &str) {
//! match *t {
//! foo_atom!("foo") => println!("Found foo!"),
//! foo_atom!("bar") => println!("Found bar!"),
//! // foo_atom!("baz") => println!("Found baz!"), - would be a compile time error
//! _ => {
//! println!("String not interned");
//! // We can intern strings at runtime as well
//! foo::FooAtom::from(t)
//! }
//! }
//! }
//! ```
//!
//! ## No compile-time atoms
//!
//! ```
//! # extern crate string_cache;
//! use string_cache::DefaultAtom;
//!
//! # fn main() {
//! let mut interned_stuff = Vec::new();
//! let text = "here is a sentence of text that will be tokenised and
//! interned and some repeated tokens is of text and";
//! for word in text.split_whitespace() {
//! let seen_before = interned_stuff.iter()
//! // We can use impl PartialEq<T> where T is anything string-like
//! // to compare to interned strings to either other interned strings,
//! // or actual strings Comparing two interned strings is very fast
//! // (normally a single cpu operation).
//! .filter(|interned_word| interned_word == &word)
//! .count();
//! if seen_before > 0 {
//! println!(r#"Seen the word "{}" {} times"#, word, seen_before);
//! } else {
//! println!(r#"Not seen the word "{}" before"#, word);
//! }
//! // We use the impl From<(Cow<'a, str>, or &'a str, or String)> for
//! // Atom<Static> to intern a new string.
//! interned_stuff.push(DefaultAtom::from(word));
//! }
//! # }
//! ```
//!
#![cfg_attr(test, deny(warnings))]
// Types, such as Atom, that impl Hash must follow the hash invariant: if two objects match
// with PartialEq, they must also have the same Hash. Clippy warns on types that derive one while
// manually impl-ing the other, because it seems easy for the two to drift apart, causing the
// invariant to be violated.
//
// But Atom is a newtype over NonZeroU64, and probably always will be, since cheap comparisons and
// copying are this library's purpose. So we know what the PartialEq comparison is going to do.
//
// The `get_hash` function, seen in `atom.rs`, consults that number, plus the global string interner
// tables. The only way for the resulting hash for two Atoms with the same inner 64-bit number to
// differ would be if the table entry changed between invocations, and that would be really bad.
#![allow(clippy::derive_hash_xor_eq)]
mod atom;
mod dynamic_set;
mod static_sets;
mod trivial_impls;
pub use atom::Atom;
pub use static_sets::{EmptyStaticAtomSet, PhfStrSet, StaticAtomSet};
/// Use this if you don’t care about static atoms.
pub type DefaultAtom = Atom<EmptyStaticAtomSet>;
// Some minor tests of internal layout here.
// See ../integration-tests for much more.
/// Guard against accidental changes to the sizes of things.
#[test]
fn assert_sizes() {
use std::mem::size_of;
assert_eq!(size_of::<DefaultAtom>(), 8);
assert_eq!(size_of::<Option<DefaultAtom>>(), size_of::<DefaultAtom>(),);
}