uuid/
parser.rs

1// Copyright 2013-2014 The Rust Project Developers.
2// Copyright 2018 The Uuid Project Developers.
3//
4// See the COPYRIGHT file at the top-level directory of this distribution.
5//
6// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
7// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
8// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
9// option. This file may not be copied, modified, or distributed
10// except according to those terms.
11
12//! [`Uuid`] parsing constructs and utilities.
13//!
14//! [`Uuid`]: ../struct.Uuid.html
15
16use crate::{
17    error::*,
18    std::{convert::TryFrom, str},
19    Uuid,
20};
21
22impl str::FromStr for Uuid {
23    type Err = Error;
24
25    fn from_str(uuid_str: &str) -> Result<Self, Self::Err> {
26        Uuid::parse_str(uuid_str)
27    }
28}
29
30impl TryFrom<&'_ str> for Uuid {
31    type Error = Error;
32
33    fn try_from(uuid_str: &'_ str) -> Result<Self, Self::Error> {
34        Uuid::parse_str(uuid_str)
35    }
36}
37
38impl Uuid {
39    /// Parses a `Uuid` from a string of hexadecimal digits with optional
40    /// hyphens.
41    ///
42    /// Any of the formats generated by this module (simple, hyphenated, urn,
43    /// Microsoft GUID) are supported by this parsing function.
44    ///
45    /// Prefer [`try_parse`] unless you need detailed user-facing diagnostics.
46    /// This method will be eventually deprecated in favor of `try_parse`.
47    ///
48    /// # Examples
49    ///
50    /// Parse a hyphenated UUID:
51    ///
52    /// ```
53    /// # use uuid::{Uuid, Version, Variant};
54    /// # fn main() -> Result<(), uuid::Error> {
55    /// let uuid = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?;
56    ///
57    /// assert_eq!(Some(Version::Random), uuid.get_version());
58    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
59    /// # Ok(())
60    /// # }
61    /// ```
62    ///
63    /// [`try_parse`]: #method.try_parse
64    pub fn parse_str(input: &str) -> Result<Uuid, Error> {
65        try_parse(input.as_bytes())
66            .map(Uuid::from_bytes)
67            .map_err(InvalidUuid::into_err)
68    }
69
70    /// Parses a `Uuid` from a string of hexadecimal digits with optional
71    /// hyphens.
72    ///
73    /// This function is similar to [`parse_str`], in fact `parse_str` shares
74    /// the same underlying parser. The difference is that if `try_parse`
75    /// fails, it won't generate very useful error messages. The `parse_str`
76    /// function will eventually be deprecated in favor of `try_parse`.
77    ///
78    /// To parse a UUID from a byte stream instead of a UTF8 string, see
79    /// [`try_parse_ascii`].
80    ///
81    /// # Examples
82    ///
83    /// Parse a hyphenated UUID:
84    ///
85    /// ```
86    /// # use uuid::{Uuid, Version, Variant};
87    /// # fn main() -> Result<(), uuid::Error> {
88    /// let uuid = Uuid::try_parse("550e8400-e29b-41d4-a716-446655440000")?;
89    ///
90    /// assert_eq!(Some(Version::Random), uuid.get_version());
91    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
92    /// # Ok(())
93    /// # }
94    /// ```
95    ///
96    /// [`parse_str`]: #method.parse_str
97    /// [`try_parse_ascii`]: #method.try_parse_ascii
98    pub const fn try_parse(input: &str) -> Result<Uuid, Error> {
99        Self::try_parse_ascii(input.as_bytes())
100    }
101
102    /// Parses a `Uuid` from a string of hexadecimal digits with optional
103    /// hyphens.
104    ///
105    /// The input is expected to be a string of ASCII characters. This method
106    /// can be more convenient than [`try_parse`] if the UUID is being
107    /// parsed from a byte stream instead of from a UTF8 string.
108    ///
109    /// # Examples
110    ///
111    /// Parse a hyphenated UUID:
112    ///
113    /// ```
114    /// # use uuid::{Uuid, Version, Variant};
115    /// # fn main() -> Result<(), uuid::Error> {
116    /// let uuid = Uuid::try_parse_ascii(b"550e8400-e29b-41d4-a716-446655440000")?;
117    ///
118    /// assert_eq!(Some(Version::Random), uuid.get_version());
119    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
120    /// # Ok(())
121    /// # }
122    /// ```
123    ///
124    /// [`try_parse`]: #method.try_parse
125    pub const fn try_parse_ascii(input: &[u8]) -> Result<Uuid, Error> {
126        match try_parse(input) {
127            Ok(bytes) => Ok(Uuid::from_bytes(bytes)),
128            // If parsing fails then we don't know exactly what went wrong
129            // In this case, we just return a generic error
130            Err(_) => Err(Error(ErrorKind::Other)),
131        }
132    }
133}
134
135const fn try_parse(input: &[u8]) -> Result<[u8; 16], InvalidUuid> {
136    match (input.len(), input) {
137        // Inputs of 32 bytes must be a non-hyphenated UUID
138        (32, s) => parse_simple(s),
139        // Hyphenated UUIDs may be wrapped in various ways:
140        // - `{UUID}` for braced UUIDs
141        // - `urn:uuid:UUID` for URNs
142        // - `UUID` for a regular hyphenated UUID
143        (36, s)
144        | (38, [b'{', s @ .., b'}'])
145        | (45, [b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..]) => {
146            parse_hyphenated(s)
147        }
148        // Any other shaped input is immediately invalid
149        _ => Err(InvalidUuid(input)),
150    }
151}
152
153#[inline]
154#[allow(dead_code)]
155pub(crate) const fn parse_braced(input: &[u8]) -> Result<[u8; 16], InvalidUuid> {
156    if let (38, [b'{', s @ .., b'}']) = (input.len(), input) {
157        parse_hyphenated(s)
158    } else {
159        Err(InvalidUuid(input))
160    }
161}
162
163#[inline]
164#[allow(dead_code)]
165pub(crate) const fn parse_urn(input: &[u8]) -> Result<[u8; 16], InvalidUuid> {
166    if let (45, [b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..]) =
167        (input.len(), input)
168    {
169        parse_hyphenated(s)
170    } else {
171        Err(InvalidUuid(input))
172    }
173}
174
175#[inline]
176pub(crate) const fn parse_simple(s: &[u8]) -> Result<[u8; 16], InvalidUuid> {
177    // This length check here removes all other bounds
178    // checks in this function
179    if s.len() != 32 {
180        return Err(InvalidUuid(s));
181    }
182
183    let mut buf: [u8; 16] = [0; 16];
184    let mut i = 0;
185
186    while i < 16 {
187        // Convert a two-char hex value (like `A8`)
188        // into a byte (like `10101000`)
189        let h1 = HEX_TABLE[s[i * 2] as usize];
190        let h2 = HEX_TABLE[s[i * 2 + 1] as usize];
191
192        // We use `0xff` as a sentinel value to indicate
193        // an invalid hex character sequence (like the letter `G`)
194        if h1 | h2 == 0xff {
195            return Err(InvalidUuid(s));
196        }
197
198        // The upper nibble needs to be shifted into position
199        // to produce the final byte value
200        buf[i] = SHL4_TABLE[h1 as usize] | h2;
201        i += 1;
202    }
203
204    Ok(buf)
205}
206
207#[inline]
208const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], InvalidUuid> {
209    // This length check here removes all other bounds
210    // checks in this function
211    if s.len() != 36 {
212        return Err(InvalidUuid(s));
213    }
214
215    // We look at two hex-encoded values (4 chars) at a time because
216    // that's the size of the smallest group in a hyphenated UUID.
217    // The indexes we're interested in are:
218    //
219    // uuid     : 936da01f-9abd-4d9d-80c7-02af85c822a8
220    //            |   |   ||   ||   ||   ||   |   |
221    // hyphens  : |   |   8|  13|  18|  23|   |   |
222    // positions: 0   4    9   14   19   24  28  32
223
224    // First, ensure the hyphens appear in the right places
225    match [s[8], s[13], s[18], s[23]] {
226        [b'-', b'-', b'-', b'-'] => {}
227        _ => return Err(InvalidUuid(s)),
228    }
229
230    let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32];
231    let mut buf: [u8; 16] = [0; 16];
232    let mut j = 0;
233
234    while j < 8 {
235        let i = positions[j];
236
237        // The decoding here is the same as the simple case
238        // We're just dealing with two values instead of one
239        let h1 = HEX_TABLE[s[i as usize] as usize];
240        let h2 = HEX_TABLE[s[(i + 1) as usize] as usize];
241        let h3 = HEX_TABLE[s[(i + 2) as usize] as usize];
242        let h4 = HEX_TABLE[s[(i + 3) as usize] as usize];
243
244        if h1 | h2 | h3 | h4 == 0xff {
245            return Err(InvalidUuid(s));
246        }
247
248        buf[j * 2] = SHL4_TABLE[h1 as usize] | h2;
249        buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4;
250        j += 1;
251    }
252
253    Ok(buf)
254}
255
256const HEX_TABLE: &[u8; 256] = &{
257    let mut buf = [0; 256];
258    let mut i: u8 = 0;
259
260    loop {
261        buf[i as usize] = match i {
262            b'0'..=b'9' => i - b'0',
263            b'a'..=b'f' => i - b'a' + 10,
264            b'A'..=b'F' => i - b'A' + 10,
265            _ => 0xff,
266        };
267
268        if i == 255 {
269            break buf;
270        }
271
272        i += 1
273    }
274};
275
276const SHL4_TABLE: &[u8; 256] = &{
277    let mut buf = [0; 256];
278    let mut i: u8 = 0;
279
280    loop {
281        buf[i as usize] = i.wrapping_shl(4);
282
283        if i == 255 {
284            break buf;
285        }
286
287        i += 1;
288    }
289};
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294    use crate::{std::string::ToString, tests::new};
295
296    #[test]
297    fn test_parse_uuid_v4_valid() {
298        let from_hyphenated = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
299        let from_simple = Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c8").unwrap();
300        let from_urn = Uuid::parse_str("urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
301        let from_guid = Uuid::parse_str("{67e55044-10b1-426f-9247-bb680e5fe0c8}").unwrap();
302
303        assert_eq!(from_hyphenated, from_simple);
304        assert_eq!(from_hyphenated, from_urn);
305        assert_eq!(from_hyphenated, from_guid);
306
307        assert!(Uuid::parse_str("00000000000000000000000000000000").is_ok());
308        assert!(Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").is_ok());
309        assert!(Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF-329BF39FA1E4").is_ok());
310        assert!(Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c8").is_ok());
311        assert!(Uuid::parse_str("01020304-1112-2122-3132-414243444546").is_ok());
312        assert!(Uuid::parse_str("urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8").is_ok());
313        assert!(Uuid::parse_str("{6d93bade-bd9f-4e13-8914-9474e1e3567b}").is_ok());
314
315        // Nil
316        let nil = Uuid::nil();
317        assert_eq!(
318            Uuid::parse_str("00000000000000000000000000000000").unwrap(),
319            nil
320        );
321        assert_eq!(
322            Uuid::parse_str("00000000-0000-0000-0000-000000000000").unwrap(),
323            nil
324        );
325    }
326
327    #[test]
328    fn test_parse_uuid_v4_invalid() {
329        // Invalid
330        assert_eq!(
331            Uuid::parse_str(""),
332            Err(Error(ErrorKind::SimpleLength { len: 0 }))
333        );
334
335        assert_eq!(
336            Uuid::parse_str("!"),
337            Err(Error(ErrorKind::Char {
338                character: '!',
339                index: 1,
340            }))
341        );
342
343        assert_eq!(
344            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF-329BF39FA1E45"),
345            Err(Error(ErrorKind::GroupLength {
346                group: 4,
347                len: 13,
348                index: 25,
349            }))
350        );
351
352        assert_eq!(
353            Uuid::parse_str("F9168C5E-CEB2-4faa-BBF-329BF39FA1E4"),
354            Err(Error(ErrorKind::GroupLength {
355                group: 3,
356                len: 3,
357                index: 20,
358            }))
359        );
360
361        assert_eq!(
362            Uuid::parse_str("F9168C5E-CEB2-4faa-BGBF-329BF39FA1E4"),
363            Err(Error(ErrorKind::Char {
364                character: 'G',
365                index: 21,
366            }))
367        );
368
369        assert_eq!(
370            Uuid::parse_str("F9168C5E-CEB2F4faaFB6BFF329BF39FA1E4"),
371            Err(Error(ErrorKind::GroupCount { count: 2 }))
372        );
373
374        assert_eq!(
375            Uuid::parse_str("F9168C5E-CEB2-4faaFB6BFF329BF39FA1E4"),
376            Err(Error(ErrorKind::GroupCount { count: 3 }))
377        );
378
379        assert_eq!(
380            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BFF329BF39FA1E4"),
381            Err(Error(ErrorKind::GroupCount { count: 4 }))
382        );
383
384        assert_eq!(
385            Uuid::parse_str("F9168C5E-CEB2-4faa"),
386            Err(Error(ErrorKind::GroupCount { count: 3 }))
387        );
388
389        assert_eq!(
390            Uuid::parse_str("F9168C5E-CEB2-4faaXB6BFF329BF39FA1E4"),
391            Err(Error(ErrorKind::Char {
392                character: 'X',
393                index: 19,
394            }))
395        );
396
397        assert_eq!(
398            Uuid::parse_str("{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41"),
399            Err(Error(ErrorKind::Char {
400                character: '{',
401                index: 1,
402            }))
403        );
404
405        assert_eq!(
406            Uuid::parse_str("{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41}"),
407            Err(Error(ErrorKind::GroupCount { count: 3 }))
408        );
409
410        assert_eq!(
411            Uuid::parse_str("F9168C5E-CEB-24fa-eB6BFF32-BF39FA1E4"),
412            Err(Error(ErrorKind::GroupLength {
413                group: 1,
414                len: 3,
415                index: 10,
416            }))
417        );
418
419        // // (group, found, expecting)
420        // //
421        assert_eq!(
422            Uuid::parse_str("01020304-1112-2122-3132-41424344"),
423            Err(Error(ErrorKind::GroupLength {
424                group: 4,
425                len: 8,
426                index: 25,
427            }))
428        );
429
430        assert_eq!(
431            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c"),
432            Err(Error(ErrorKind::SimpleLength { len: 31 }))
433        );
434
435        assert_eq!(
436            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c88"),
437            Err(Error(ErrorKind::SimpleLength { len: 33 }))
438        );
439
440        assert_eq!(
441            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0cg8"),
442            Err(Error(ErrorKind::Char {
443                character: 'g',
444                index: 32,
445            }))
446        );
447
448        assert_eq!(
449            Uuid::parse_str("67e5504410b1426%9247bb680e5fe0c8"),
450            Err(Error(ErrorKind::Char {
451                character: '%',
452                index: 16,
453            }))
454        );
455
456        assert_eq!(
457            Uuid::parse_str("231231212212423424324323477343246663"),
458            Err(Error(ErrorKind::SimpleLength { len: 36 }))
459        );
460
461        assert_eq!(
462            Uuid::parse_str("{00000000000000000000000000000000}"),
463            Err(Error(ErrorKind::GroupCount { count: 1 }))
464        );
465
466        assert_eq!(
467            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c"),
468            Err(Error(ErrorKind::SimpleLength { len: 31 }))
469        );
470
471        assert_eq!(
472            Uuid::parse_str("67e550X410b1426f9247bb680e5fe0cd"),
473            Err(Error(ErrorKind::Char {
474                character: 'X',
475                index: 7,
476            }))
477        );
478
479        assert_eq!(
480            Uuid::parse_str("67e550-4105b1426f9247bb680e5fe0c"),
481            Err(Error(ErrorKind::GroupCount { count: 2 }))
482        );
483
484        assert_eq!(
485            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF1-02BF39FA1E4"),
486            Err(Error(ErrorKind::GroupLength {
487                group: 3,
488                len: 5,
489                index: 20,
490            }))
491        );
492
493        assert_eq!(
494            Uuid::parse_str("\u{bcf3c}"),
495            Err(Error(ErrorKind::Char {
496                character: '\u{bcf3c}',
497                index: 1
498            }))
499        );
500    }
501
502    #[test]
503    fn test_roundtrip_default() {
504        let uuid_orig = new();
505        let orig_str = uuid_orig.to_string();
506        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
507        assert_eq!(uuid_orig, uuid_out);
508    }
509
510    #[test]
511    fn test_roundtrip_hyphenated() {
512        let uuid_orig = new();
513        let orig_str = uuid_orig.hyphenated().to_string();
514        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
515        assert_eq!(uuid_orig, uuid_out);
516    }
517
518    #[test]
519    fn test_roundtrip_simple() {
520        let uuid_orig = new();
521        let orig_str = uuid_orig.simple().to_string();
522        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
523        assert_eq!(uuid_orig, uuid_out);
524    }
525
526    #[test]
527    fn test_roundtrip_urn() {
528        let uuid_orig = new();
529        let orig_str = uuid_orig.urn().to_string();
530        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
531        assert_eq!(uuid_orig, uuid_out);
532    }
533
534    #[test]
535    fn test_roundtrip_braced() {
536        let uuid_orig = new();
537        let orig_str = uuid_orig.braced().to_string();
538        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
539        assert_eq!(uuid_orig, uuid_out);
540    }
541
542    #[test]
543    fn test_roundtrip_parse_urn() {
544        let uuid_orig = new();
545        let orig_str = uuid_orig.urn().to_string();
546        let uuid_out = Uuid::from_bytes(parse_urn(orig_str.as_bytes()).unwrap());
547        assert_eq!(uuid_orig, uuid_out);
548    }
549
550    #[test]
551    fn test_roundtrip_parse_braced() {
552        let uuid_orig = new();
553        let orig_str = uuid_orig.braced().to_string();
554        let uuid_out = Uuid::from_bytes(parse_braced(orig_str.as_bytes()).unwrap());
555        assert_eq!(uuid_orig, uuid_out);
556    }
557
558    #[test]
559    fn test_try_parse_ascii_non_utf8() {
560        assert!(Uuid::try_parse_ascii(b"67e55044-10b1-426f-9247-bb680e5\0e0c8").is_err());
561    }
562}