litrs/lib.rs
1//! Parsing and inspecting Rust literal tokens.
2//!
3//! This library offers functionality to parse Rust literals, i.e. tokens in the
4//! Rust programming language that represent fixed values. The grammar for
5//! those is defined [here][ref].
6//!
7//! This kind of functionality already exists in the crate `syn`. However, as
8//! you oftentimes don't need (nor want) the full power of `syn`, `litrs` was
9//! built. This crate also offers a bit more flexibility compared to `syn`
10//! (only regarding literals, of course).
11//!
12//!
13//! # Quick start
14//!
15//! | **`StringLit::try_from(tt)?.value()`** |
16//! | - |
17//!
18//! ... where `tt` is a `proc_macro::TokenTree` and where [`StringLit`] can be
19//! replaced with [`Literal`] or other types of literals (e.g. [`FloatLit`]).
20//! Calling `value()` returns the value that is represented by the literal.
21//!
22//! **Mini Example**
23//!
24//! ```ignore
25//! use proc_macro::TokenStream;
26//!
27//! #[proc_macro]
28//! pub fn foo(input: TokenStream) -> TokenStream {
29//! let first_token = input.into_iter().next().unwrap(); // Do proper error handling!
30//! let string_value = match litrs::StringLit::try_from(first_token) {
31//! Ok(string_lit) => string_lit.value(),
32//! Err(e) => return e.to_compile_error(),
33//! };
34//!
35//! // `string_value` is the string value with all escapes resolved.
36//! todo!()
37//! }
38//! ```
39//!
40//! # Overview
41//!
42//! The main types of this library are [`Literal`], representing any kind of
43//! literal, and `*Lit`, like [`StringLit`] or [`FloatLit`], representing a
44//! specific kind of literal.
45//!
46//! There are different ways to obtain such a literal type:
47//!
48//! - **`parse`**: parses a `&str` or `String` and returns `Result<_,
49//! ParseError>`. For example: [`Literal::parse`] and
50//! [`IntegerLit::parse`].
51//!
52//! - **`From<proc_macro::Literal> for Literal`**: turns a `Literal` value from
53//! the `proc_macro` crate into a `Literal` from this crate.
54//!
55//! - **`TryFrom<proc_macro::Literal> for *Lit`**: tries to turn a
56//! `proc_macro::Literal` into a specific literal type of this crate. If
57//! the input is a literal of a different kind, `Err(InvalidToken)` is
58//! returned.
59//!
60//! - **`TryFrom<proc_macro::TokenTree>`**: attempts to turn a token tree into a
61//! literal type of this crate. An error is returned if the token tree is
62//! not a literal, or if you are trying to turn it into a specific kind of
63//! literal and the token tree is a different kind of literal.
64//!
65//! All of the `From` and `TryFrom` conversions also work for reference to
66//! `proc_macro` types. Additionally, if the crate feature `proc-macro2` is
67//! enabled (which it is by default), all these `From` and `TryFrom` impls also
68//! exist for the corresponding `proc_macro2` types.
69//!
70//! **Note**: `true` and `false` are `Ident`s when passed to your proc macro.
71//! The `TryFrom<TokenTree>` impls check for those two special idents and
72//! return a [`BoolLit`] appropriately. For that reason, there is also no
73//! `TryFrom<proc_macro::Literal>` impl for [`BoolLit`]. The `proc_macro::Literal`
74//! simply cannot represent bool literals.
75//!
76//!
77//! # Examples
78//!
79//! In a proc-macro:
80//!
81//! ```ignore
82//! use std::convert::TryFrom;
83//! use proc_macro::TokenStream;
84//! use litrs::FloatLit;
85//!
86//! #[proc_macro]
87//! pub fn foo(input: TokenStream) -> TokenStream {
88//! let mut input = input.into_iter().collect::<Vec<_>>();
89//! if input.len() != 1 {
90//! // Please do proper error handling in your real code!
91//! panic!("expected exactly one token as input");
92//! }
93//! let token = input.remove(0);
94//!
95//! match FloatLit::try_from(token) {
96//! Ok(float_lit) => { /* do something */ }
97//! Err(e) => return e.to_compile_error(),
98//! }
99//!
100//! // Dummy output
101//! TokenStream::new()
102//! }
103//! ```
104//!
105//! Parsing from string:
106//!
107//! ```
108//! use litrs::{FloatLit, Literal};
109//!
110//! // Parse a specific kind of literal (float in this case):
111//! let float_lit = FloatLit::parse("3.14f32");
112//! assert!(float_lit.is_ok());
113//! assert_eq!(float_lit.unwrap().suffix(), "f32");
114//! assert!(FloatLit::parse("'c'").is_err());
115//!
116//! // Parse any kind of literal. After parsing, you can inspect the literal
117//! // and decide what to do in each case.
118//! let lit = Literal::parse("0xff80").expect("failed to parse literal");
119//! match lit {
120//! Literal::Integer(lit) => { /* ... */ }
121//! Literal::Float(lit) => { /* ... */ }
122//! Literal::Bool(lit) => { /* ... */ }
123//! Literal::Char(lit) => { /* ... */ }
124//! Literal::String(lit) => { /* ... */ }
125//! Literal::Byte(lit) => { /* ... */ }
126//! Literal::ByteString(lit) => { /* ... */ }
127//! }
128//! ```
129//!
130//!
131//!
132//! # Crate features
133//!
134//! - `proc-macro2` (**default**): adds the dependency `proc_macro2`, a bunch of
135//! `From` and `TryFrom` impls, and [`InvalidToken::to_compile_error2`].
136//! - `check_suffix`: if enabled, `parse` functions will exactly verify that the
137//! literal suffix is valid. Adds the dependency `unicode-xid`. If disabled,
138//! only an approximate check (only in ASCII range) is done. If you are
139//! writing a proc macro, you don't need to enable this as the suffix is
140//! already checked by the compiler.
141//!
142//!
143//! [ref]: https://doc.rust-lang.org/reference/tokens.html#literals
144//!
145
146#![deny(missing_debug_implementations)]
147
148extern crate proc_macro;
149
150#[cfg(test)]
151#[macro_use]
152mod test_util;
153
154#[cfg(test)]
155mod tests;
156
157mod bool;
158mod byte;
159mod bytestr;
160mod char;
161mod err;
162mod escape;
163mod float;
164mod impls;
165mod integer;
166mod parse;
167mod string;
168
169
170use std::{borrow::{Borrow, Cow}, fmt, ops::{Deref, Range}};
171
172pub use self::{
173 bool::BoolLit,
174 byte::ByteLit,
175 bytestr::ByteStringLit,
176 char::CharLit,
177 err::{InvalidToken, ParseError},
178 float::{FloatLit, FloatType},
179 integer::{FromIntegerLiteral, IntegerLit, IntegerBase, IntegerType},
180 string::StringLit,
181};
182
183
184// ==============================================================================================
185// ===== `Literal` and type defs
186// ==============================================================================================
187
188/// A literal. This is the main type of this library.
189///
190/// This type is generic over the underlying buffer `B`, which can be `&str` or
191/// `String`.
192///
193/// To create this type, you have to either call [`Literal::parse`] with an
194/// input string or use the `From<_>` impls of this type. The impls are only
195/// available of the corresponding crate features are enabled (they are enabled
196/// by default).
197#[derive(Debug, Clone, PartialEq, Eq)]
198pub enum Literal<B: Buffer> {
199 Bool(BoolLit),
200 Integer(IntegerLit<B>),
201 Float(FloatLit<B>),
202 Char(CharLit<B>),
203 String(StringLit<B>),
204 Byte(ByteLit<B>),
205 ByteString(ByteStringLit<B>),
206}
207
208impl<B: Buffer> Literal<B> {
209 /// Parses the given input as a Rust literal.
210 pub fn parse(input: B) -> Result<Self, ParseError> {
211 parse::parse(input)
212 }
213
214 /// Returns the suffix of this literal or `""` if it doesn't have one.
215 ///
216 /// Rust token grammar actually allows suffixes for all kinds of tokens.
217 /// Most Rust programmer only know the type suffixes for integer and
218 /// floats, e.g. `0u32`. And in normal Rust code, everything else causes an
219 /// error. But it is possible to pass literals with arbitrary suffixes to
220 /// proc macros, for example:
221 ///
222 /// ```ignore
223 /// some_macro!(3.14f33 16px '🦊'good_boy "toph"beifong);
224 /// ```
225 ///
226 /// Boolean literals, not actually being literals, but idents, cannot have
227 /// suffixes and this method always returns `""` for those.
228 ///
229 /// There are some edge cases to be aware of:
230 /// - Integer suffixes must not start with `e` or `E` as that conflicts with
231 /// the exponent grammar for floats. `0e1` is a float; `0eel` is also
232 /// parsed as a float and results in an error.
233 /// - Hexadecimal integers eagerly parse digits, so `0x5abcdefgh` has a
234 /// suffix von `gh`.
235 /// - Suffixes can contain and start with `_`, but for integer and number
236 /// literals, `_` is eagerly parsed as part of the number, so `1_x` has
237 /// the suffix `x`.
238 /// - The input `55f32` is regarded as integer literal with suffix `f32`.
239 ///
240 /// # Example
241 ///
242 /// ```
243 /// use litrs::Literal;
244 ///
245 /// assert_eq!(Literal::parse(r##"3.14f33"##).unwrap().suffix(), "f33");
246 /// assert_eq!(Literal::parse(r##"123hackerman"##).unwrap().suffix(), "hackerman");
247 /// assert_eq!(Literal::parse(r##"0x0fuck"##).unwrap().suffix(), "uck");
248 /// assert_eq!(Literal::parse(r##"'🦊'good_boy"##).unwrap().suffix(), "good_boy");
249 /// assert_eq!(Literal::parse(r##""toph"beifong"##).unwrap().suffix(), "beifong");
250 /// ```
251 pub fn suffix(&self) -> &str {
252 match self {
253 Literal::Bool(_) => "",
254 Literal::Integer(l) => l.suffix(),
255 Literal::Float(l) => l.suffix(),
256 Literal::Char(l) => l.suffix(),
257 Literal::String(l) => l.suffix(),
258 Literal::Byte(l) => l.suffix(),
259 Literal::ByteString(l) => l.suffix(),
260 }
261 }
262}
263
264impl Literal<&str> {
265 /// Makes a copy of the underlying buffer and returns the owned version of
266 /// `Self`.
267 pub fn into_owned(self) -> Literal<String> {
268 match self {
269 Literal::Bool(l) => Literal::Bool(l.to_owned()),
270 Literal::Integer(l) => Literal::Integer(l.to_owned()),
271 Literal::Float(l) => Literal::Float(l.to_owned()),
272 Literal::Char(l) => Literal::Char(l.to_owned()),
273 Literal::String(l) => Literal::String(l.into_owned()),
274 Literal::Byte(l) => Literal::Byte(l.to_owned()),
275 Literal::ByteString(l) => Literal::ByteString(l.into_owned()),
276 }
277 }
278}
279
280impl<B: Buffer> fmt::Display for Literal<B> {
281 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282 match self {
283 Literal::Bool(l) => l.fmt(f),
284 Literal::Integer(l) => l.fmt(f),
285 Literal::Float(l) => l.fmt(f),
286 Literal::Char(l) => l.fmt(f),
287 Literal::String(l) => l.fmt(f),
288 Literal::Byte(l) => l.fmt(f),
289 Literal::ByteString(l) => l.fmt(f),
290 }
291 }
292}
293
294
295// ==============================================================================================
296// ===== Buffer
297// ==============================================================================================
298
299/// A shared or owned string buffer. Implemented for `String` and `&str`. *Implementation detail*.
300///
301/// This is trait is implementation detail of this library, cannot be
302/// implemented in other crates and is not subject to semantic versioning.
303/// `litrs` only guarantees that this trait is implemented for `String` and
304/// `for<'a> &'a str`.
305pub trait Buffer: sealed::Sealed + Deref<Target = str> {
306 /// This is `Cow<'static, str>` for `String`, and `Cow<'a, str>` for `&'a str`.
307 type Cow: From<String> + AsRef<str> + Borrow<str> + Deref<Target = str>;
308
309 #[doc(hidden)]
310 fn into_cow(self) -> Self::Cow;
311
312 /// This is `Cow<'static, [u8]>` for `String`, and `Cow<'a, [u8]>` for `&'a str`.
313 type ByteCow: From<Vec<u8>> + AsRef<[u8]> + Borrow<[u8]> + Deref<Target = [u8]>;
314
315 #[doc(hidden)]
316 fn into_byte_cow(self) -> Self::ByteCow;
317
318 /// Cuts away some characters at the beginning and some at the end. Given
319 /// range has to be in bounds.
320 #[doc(hidden)]
321 fn cut(self, range: Range<usize>) -> Self;
322}
323
324mod sealed {
325 pub trait Sealed {}
326}
327
328impl<'a> sealed::Sealed for &'a str {}
329impl<'a> Buffer for &'a str {
330 #[doc(hidden)]
331 fn cut(self, range: Range<usize>) -> Self {
332 &self[range]
333 }
334
335 type Cow = Cow<'a, str>;
336 #[doc(hidden)]
337 fn into_cow(self) -> Self::Cow {
338 self.into()
339 }
340 type ByteCow = Cow<'a, [u8]>;
341 #[doc(hidden)]
342 fn into_byte_cow(self) -> Self::ByteCow {
343 self.as_bytes().into()
344 }
345}
346
347impl sealed::Sealed for String {}
348impl Buffer for String {
349 #[doc(hidden)]
350 fn cut(mut self, range: Range<usize>) -> Self {
351 // This is not the most efficient way, but it works. First we cut the
352 // end, then the beginning. Note that `drain` also removes the range if
353 // the iterator is not consumed.
354 self.truncate(range.end);
355 self.drain(..range.start);
356 self
357 }
358
359 type Cow = Cow<'static, str>;
360 #[doc(hidden)]
361 fn into_cow(self) -> Self::Cow {
362 self.into()
363 }
364
365 type ByteCow = Cow<'static, [u8]>;
366 #[doc(hidden)]
367 fn into_byte_cow(self) -> Self::ByteCow {
368 self.into_bytes().into()
369 }
370}