litrs/integer/
mod.rs

1use std::{fmt, str::FromStr};
2
3use crate::{
4    Buffer, ParseError,
5    err::{perr, ParseErrorKind::*},
6    parse::{first_byte_or_empty, hex_digit_value, check_suffix},
7};
8
9
10/// An integer literal, e.g. `27`, `0x7F`, `0b101010u8` or `5_000_000i64`.
11///
12/// An integer literal consists of an optional base prefix (`0b`, `0o`, `0x`),
13/// the main part (digits and underscores), and an optional type suffix
14/// (e.g. `u64` or `i8`). See [the reference][ref] for more information.
15///
16/// Note that integer literals are always positive: the grammar does not contain
17/// the minus sign at all. The minus sign is just the unary negate operator,
18/// not part of the literal. Which is interesting for cases like `- 128i8`:
19/// here, the literal itself would overflow the specified type (`i8` cannot
20/// represent 128). That's why in rustc, the literal overflow check is
21/// performed as a lint after parsing, not during the lexing stage. Similarly,
22/// [`IntegerLit::parse`] does not perform an overflow check.
23///
24/// [ref]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26#[non_exhaustive]
27pub struct IntegerLit<B: Buffer> {
28    /// The raw literal. Grammar: `<prefix?><main part><suffix?>`.
29    raw: B,
30    /// First index of the main number part (after the base prefix).
31    start_main_part: usize,
32    /// First index not part of the main number part.
33    end_main_part: usize,
34    /// Parsed `raw[..start_main_part]`.
35    base: IntegerBase,
36}
37
38impl<B: Buffer> IntegerLit<B> {
39    /// Parses the input as an integer literal. Returns an error if the input is
40    /// invalid or represents a different kind of literal.
41    pub fn parse(input: B) -> Result<Self, ParseError> {
42        match first_byte_or_empty(&input)? {
43            digit @ b'0'..=b'9' => {
44                // TODO: simplify once RFC 2528 is stabilized
45                let IntegerLit {
46                    start_main_part,
47                    end_main_part,
48                    base,
49                    ..
50                } =  parse_impl(&input, digit)?;
51
52                Ok(Self { raw: input, start_main_part, end_main_part, base })
53            },
54            _ => Err(perr(0, DoesNotStartWithDigit)),
55        }
56    }
57
58    /// Performs the actual string to int conversion to obtain the integer
59    /// value. The optional type suffix of the literal **is ignored by this
60    /// method**. This means `N` does not need to match the type suffix!
61    ///
62    /// Returns `None` if the literal overflows `N`.
63    ///
64    /// Hint: `u128` can represent all possible values integer literal values,
65    /// as there are no negative literals (see type docs). Thus you can, for
66    /// example, safely use `lit.value::<u128>().to_string()` to get a decimal
67    /// string. (Technically, Rust integer literals can represent arbitrarily
68    /// large numbers, but those would be rejected at a later stage by the Rust
69    /// compiler).
70    pub fn value<N: FromIntegerLiteral>(&self) -> Option<N> {
71        let base = N::from_small_number(self.base.value());
72
73        let mut acc = N::from_small_number(0);
74        for digit in self.raw_main_part().bytes() {
75            if digit == b'_' {
76                continue;
77            }
78
79            // We don't actually need the base here: we already know this main
80            // part only contains digits valid for the specified base.
81            let digit = hex_digit_value(digit)
82                .unwrap_or_else(|| unreachable!("bug: integer main part contains non-digit"));
83
84            acc = acc.checked_mul(base)?;
85            acc = acc.checked_add(N::from_small_number(digit))?;
86        }
87
88        Some(acc)
89    }
90
91    /// The base of this integer literal.
92    pub fn base(&self) -> IntegerBase {
93        self.base
94    }
95
96    /// The main part containing the digits and potentially `_`. Do not try to
97    /// parse this directly as that would ignore the base!
98    pub fn raw_main_part(&self) -> &str {
99        &(*self.raw)[self.start_main_part..self.end_main_part]
100    }
101
102    /// The optional suffix. Returns `""` if the suffix is empty/does not exist.
103    ///
104    /// If you want the type, try `IntegerType::from_suffix(lit.suffix())`.
105    pub fn suffix(&self) -> &str {
106        &(*self.raw)[self.end_main_part..]
107    }
108
109    /// Returns the raw input that was passed to `parse`.
110    pub fn raw_input(&self) -> &str {
111        &self.raw
112    }
113
114    /// Returns the raw input that was passed to `parse`, potentially owned.
115    pub fn into_raw_input(self) -> B {
116        self.raw
117    }
118}
119
120impl IntegerLit<&str> {
121    /// Makes a copy of the underlying buffer and returns the owned version of
122    /// `Self`.
123    pub fn to_owned(&self) -> IntegerLit<String> {
124        IntegerLit {
125            raw: self.raw.to_owned(),
126            start_main_part: self.start_main_part,
127            end_main_part: self.end_main_part,
128            base: self.base,
129        }
130    }
131}
132
133impl<B: Buffer> fmt::Display for IntegerLit<B> {
134    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135        write!(f, "{}", &*self.raw)
136    }
137}
138
139/// Integer literal types. *Implementation detail*.
140///
141/// Implemented for all integer literal types. This trait is sealed and cannot
142/// be implemented outside of this crate. The trait's methods are implementation
143/// detail of this library and are not subject to semver.
144pub trait FromIntegerLiteral: self::sealed::Sealed + Copy {
145    /// Creates itself from the given number. `n` is guaranteed to be `<= 16`.
146    #[doc(hidden)]
147    fn from_small_number(n: u8) -> Self;
148
149    #[doc(hidden)]
150    fn checked_add(self, rhs: Self) -> Option<Self>;
151
152    #[doc(hidden)]
153    fn checked_mul(self, rhs: Self) -> Option<Self>;
154
155    #[doc(hidden)]
156    fn ty() -> IntegerType;
157}
158
159macro_rules! impl_from_int_literal {
160    ($( $ty:ty => $variant:ident ,)* ) => {
161        $(
162            impl self::sealed::Sealed for $ty {}
163            impl FromIntegerLiteral for $ty {
164                fn from_small_number(n: u8) -> Self {
165                    n as Self
166                }
167                fn checked_add(self, rhs: Self) -> Option<Self> {
168                    self.checked_add(rhs)
169                }
170                fn checked_mul(self, rhs: Self) -> Option<Self> {
171                    self.checked_mul(rhs)
172                }
173                fn ty() -> IntegerType {
174                    IntegerType::$variant
175                }
176            }
177        )*
178    };
179}
180
181impl_from_int_literal!(
182    u8 => U8, u16 => U16, u32 => U32, u64 => U64, u128 => U128, usize => Usize,
183    i8 => I8, i16 => I16, i32 => I32, i64 => I64, i128 => I128, isize => Isize,
184);
185
186mod sealed {
187    pub trait Sealed {}
188}
189
190/// Precondition: first byte of string has to be in `b'0'..=b'9'`.
191#[inline(never)]
192pub(crate) fn parse_impl(input: &str, first: u8) -> Result<IntegerLit<&str>, ParseError> {
193    // Figure out base and strip prefix base, if it exists.
194    let (end_prefix, base) = match (first, input.as_bytes().get(1)) {
195        (b'0', Some(b'b')) => (2, IntegerBase::Binary),
196        (b'0', Some(b'o')) => (2, IntegerBase::Octal),
197        (b'0', Some(b'x')) => (2, IntegerBase::Hexadecimal),
198
199        // Everything else is treated as decimal. Several cases are caught
200        // by this:
201        // - "123"
202        // - "0"
203        // - "0u8"
204        // - "0r" -> this will error later
205        _ => (0, IntegerBase::Decimal),
206    };
207    let without_prefix = &input[end_prefix..];
208
209
210    // Scan input to find the first character that's not a valid digit.
211    let is_valid_digit = match base {
212        IntegerBase::Binary => |b| matches!(b, b'0' | b'1' | b'_'),
213        IntegerBase::Octal => |b| matches!(b, b'0'..=b'7' | b'_'),
214        IntegerBase::Decimal => |b| matches!(b, b'0'..=b'9' | b'_'),
215        IntegerBase::Hexadecimal => |b| matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_'),
216    };
217    let end_main = without_prefix.bytes()
218        .position(|b| !is_valid_digit(b))
219        .unwrap_or(without_prefix.len());
220    let (main_part, suffix) = without_prefix.split_at(end_main);
221
222    check_suffix(suffix).map_err(|kind| {
223        // This is just to have a nicer error kind for this special case. If the
224        // suffix is invalid, it is non-empty -> unwrap ok.
225        let first = suffix.as_bytes()[0];
226        if !is_valid_digit(first) && first.is_ascii_digit() {
227            perr(end_main + end_prefix, InvalidDigit)
228        } else {
229            perr(end_main + end_prefix..input.len(), kind)
230        }
231    })?;
232    if suffix.starts_with('e') || suffix.starts_with('E') {
233        return Err(perr(end_main, IntegerSuffixStartingWithE));
234    }
235
236    // Make sure main number part is not empty.
237    if main_part.bytes().filter(|&b| b != b'_').count() == 0 {
238        return Err(perr(end_prefix..end_prefix + end_main, NoDigits));
239    }
240
241    Ok(IntegerLit {
242        raw: input,
243        start_main_part: end_prefix,
244        end_main_part: end_main + end_prefix,
245        base,
246    })
247}
248
249
250/// The bases in which an integer can be specified.
251#[derive(Debug, Clone, Copy, PartialEq, Eq)]
252pub enum IntegerBase {
253    Binary,
254    Octal,
255    Decimal,
256    Hexadecimal,
257}
258
259impl IntegerBase {
260    /// Returns the literal prefix that indicates this base, i.e. `"0b"`,
261    /// `"0o"`, `""` and `"0x"`.
262    pub fn prefix(self) -> &'static str {
263        match self {
264            Self::Binary => "0b",
265            Self::Octal => "0o",
266            Self::Decimal => "",
267            Self::Hexadecimal => "0x",
268        }
269    }
270
271    /// Returns the base value, i.e. 2, 8, 10 or 16.
272    pub fn value(self) -> u8 {
273        match self {
274            Self::Binary => 2,
275            Self::Octal => 8,
276            Self::Decimal => 10,
277            Self::Hexadecimal => 16,
278        }
279    }
280}
281
282/// All possible integer type suffixes.
283#[derive(Debug, Clone, Copy, PartialEq, Eq)]
284#[non_exhaustive]
285pub enum IntegerType {
286    U8,
287    U16,
288    U32,
289    U64,
290    U128,
291    Usize,
292    I8,
293    I16,
294    I32,
295    I64,
296    I128,
297    Isize,
298}
299
300impl IntegerType {
301    /// Returns the type corresponding to the given suffix (e.g. `"u8"` is
302    /// mapped to `Self::U8`). If the suffix is not a valid integer type,
303    /// `None` is returned.
304    pub fn from_suffix(suffix: &str) -> Option<Self> {
305        match suffix {
306            "u8" => Some(Self::U8),
307            "u16" => Some(Self::U16),
308            "u32" => Some(Self::U32),
309            "u64" => Some(Self::U64),
310            "u128" => Some(Self::U128),
311            "usize" => Some(Self::Usize),
312            "i8" => Some(Self::I8),
313            "i16" => Some(Self::I16),
314            "i32" => Some(Self::I32),
315            "i64" => Some(Self::I64),
316            "i128" => Some(Self::I128),
317            "isize" => Some(Self::Isize),
318            _ => None,
319        }
320    }
321
322    /// Returns the suffix for this type, e.g. `"u8"` for `Self::U8`.
323    pub fn suffix(self) -> &'static str {
324        match self {
325            Self::U8 => "u8",
326            Self::U16 => "u16",
327            Self::U32 => "u32",
328            Self::U64 => "u64",
329            Self::U128 => "u128",
330            Self::Usize => "usize",
331            Self::I8 => "i8",
332            Self::I16 => "i16",
333            Self::I32 => "i32",
334            Self::I64 => "i64",
335            Self::I128 => "i128",
336            Self::Isize => "isize",
337        }
338    }
339}
340
341impl FromStr for IntegerType {
342    type Err = ();
343    fn from_str(s: &str) -> Result<Self, Self::Err> {
344        Self::from_suffix(s).ok_or(())
345    }
346}
347
348impl fmt::Display for IntegerType {
349    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
350        self.suffix().fmt(f)
351    }
352}
353
354
355#[cfg(test)]
356mod tests;