Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
eb5dd55
Fix clippy issues for rust 1.67 (#4478)
DimitrisJim Jan 31, 2023
1d0cb29
feat: allow not set SIGINT handler
discord9 Jan 6, 2023
844ff75
feat: impl Detail option for not set SIG_INT
discord9 Jan 31, 2023
3a08ff1
use qualname in `TypeErrors` for functions (#4476)
nicku12345 Feb 1, 2023
29e322b
Match on ascii start/continuation characters before calling functions.
DimitrisJim Jan 29, 2023
516c41d
Don't call is_emoji_presentation for each invocation of consume_normal
DimitrisJim Jan 29, 2023
a3eb988
Add initial capacities, use u32s for indents/spaces.
DimitrisJim Jan 29, 2023
6ba8191
Eat for comma.
DimitrisJim Jan 30, 2023
4c37307
Hint that the unwrap should always succeed.
DimitrisJim Feb 1, 2023
2cb6634
use workspace dependencies
youknowone Feb 6, 2023
2ce349d
Bump openssl-src from 111.24.0+1.1.1s to 111.25.0+1.1.1t
dependabot[bot] Feb 8, 2023
01380bf
Move NewLineHandler inline, don't check each character twice.
DimitrisJim Feb 6, 2023
8ef74d6
Document lexer.
DimitrisJim Feb 7, 2023
0d3ff4d
Try to fix mac build
youknowone Jan 22, 2023
c682063
update libffi
youknowone Jan 22, 2023
1f9a48f
bump up openssl and libffi
youknowone Jan 22, 2023
600a3da
skip run rust tests for macOS CI
youknowone Feb 9, 2023
585d8f2
Simplify examples/call_between_rust_and_python
youknowone Feb 9, 2023
a7ebb80
Skip linking ssl on mac runner.
DimitrisJim Feb 9, 2023
4c96416
Use entire range for generators-as-arguments
charliermarsh Feb 10, 2023
c1defc1
Add test_generator_expression_argument
youknowone Feb 10, 2023
37b4e97
Refactor: Join string and string_parser.
DimitrisJim Feb 11, 2023
deca153
Document parser crate.
DimitrisJim Feb 7, 2023
595897c
fix the typos
howjmay Feb 12, 2023
e53e891
Add tests, some comments, to function.rs.
DimitrisJim Feb 11, 2023
9fc54fd
extra_tests/snippets/{builtins => builtin_eval}.py
youknowone Feb 13, 2023
a8d63ff
remove duplicated tests from tests/stdlib_math.py
youknowone Feb 13, 2023
1b6d45d
Remove completed TODO
youknowone Feb 13, 2023
6871964
wrap_index without abs
youknowone Jun 10, 2022
6731150
optimize str.(l|r)strip
youknowone Feb 13, 2023
0c7324e
Fix aarch64 compatibility for sqlite.
jonathanslenders Feb 13, 2023
c45bcfa
Fix str.join with str subclass
youknowone Feb 13, 2023
96178a1
Add co_freevars to code object
minhrongcon2000 Feb 16, 2023
9ac6e99
Fix code linting
minhrongcon2000 Feb 16, 2023
c5a629b
Add co_cellvars to code object
howjmay Feb 15, 2023
1755f04
Fix unexpected success in test_future.
DimitrisJim Feb 16, 2023
390c8da
Use nix for more things
coolreader18 Feb 9, 2023
8bac26f
Tidy up ssl a little
coolreader18 Feb 16, 2023
39ddc50
Run cargo-update
coolreader18 Feb 16, 2023
59c7536
PyObjectRef::downcast_exact returns PyRefExact
youknowone Feb 14, 2023
c97d504
Add description for PyRefExact
youknowone Feb 16, 2023
ef5ba76
Optimize Py<PyDict>::to_attributes() to reuse PyRefExact
xiaozhiyan Feb 16, 2023
b1f41c1
Optimize bytes-like (l|r)strip (#4500)
dannasman Feb 17, 2023
900f49a
narrow publicity of BytesInner::elements
youknowone Feb 17, 2023
ad0b15e
remove unnecessary to_vec()
dannasman Feb 17, 2023
5b17bd6
Handle panic in case of unsupported format in chrono
itsankitkp Jan 27, 2023
d4a6d39
feat! add test case for unsupported time format
itsankitkp Jan 31, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Document parser crate.
  • Loading branch information
DimitrisJim authored and itsankitkp committed Feb 19, 2023
commit deca1538b565a34d060c3c351c2a2087c84184f3
2 changes: 1 addition & 1 deletion compiler/parser/src/context.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use rustpython_ast::{Expr, ExprContext, ExprKind};

pub fn set_context(expr: Expr, ctx: ExprContext) -> Expr {
pub(crate) fn set_context(expr: Expr, ctx: ExprContext) -> Expr {
match expr.node {
ExprKind::Name { id, .. } => Expr {
node: ExprKind::Name { id, ctx },
Expand Down
66 changes: 60 additions & 6 deletions compiler/parser/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,40 +1,71 @@
//! Define internal parse error types
//! The goal is to provide a matching and a safe error API, maksing errors from LALR
//! Error types for the parser.
//!
//! These types are used to represent errors that occur during lexing and parsing and are
//! returned by the `parse_*` functions in the [parser] module and the iterator in the
//! [lexer] implementation.
//!
//! [parser]: crate::parser
//! [lexer]: crate::lexer

// Define internal parse error types.
// The goal is to provide a matching and a safe error API, masking errors from LALR
use crate::{ast::Location, token::Tok};
use lalrpop_util::ParseError as LalrpopError;
use std::fmt;

/// Represents an error during lexical scanning.
/// Represents an error during lexing.
#[derive(Debug, PartialEq)]
pub struct LexicalError {
/// The type of error that occurred.
pub error: LexicalErrorType,
/// The location of the error.
pub location: Location,
}

impl LexicalError {
/// Creates a new `LexicalError` with the given error type and location.
pub fn new(error: LexicalErrorType, location: Location) -> Self {
Self { error, location }
}
}

/// Represents the different types of errors that can occur during lexing.
#[derive(Debug, PartialEq)]
pub enum LexicalErrorType {
// TODO: Can probably be removed, the places it is used seem to be able
// to use the `UnicodeError` variant instead.
#[doc(hidden)]
StringError,
// TODO: Should take a start/end position to report.
/// Decoding of a unicode escape sequence in a string literal failed.
UnicodeError,
/// The nesting of brackets/braces/parentheses is not balanced.
NestingError,
/// The indentation is not consistent.
IndentationError,
/// Inconsistent use of tabs and spaces.
TabError,
/// Encountered a tab after a space.
TabsAfterSpaces,
/// A non-default argument follows a default argument.
DefaultArgumentError,
/// A duplicate argument was found in a function definition.
DuplicateArgumentError(String),
/// A positional argument follows a keyword argument.
PositionalArgumentError,
/// An iterable argument unpacking `*args` follows keyword argument unpacking `**kwargs`.
UnpackedArgumentError,
/// A keyword argument was repeated.
DuplicateKeywordArgumentError(String),
/// An unrecognized token was encountered.
UnrecognizedToken { tok: char },
/// An f-string error containing the [`FStringErrorType`].
FStringError(FStringErrorType),
/// An unexpected character was encountered after a line continuation.
LineContinuationError,
/// An unexpected end of file was encountered.
Eof,
/// An unexpected error occurred.
OtherError(String),
}

Expand Down Expand Up @@ -85,13 +116,17 @@ impl fmt::Display for LexicalErrorType {
}

// TODO: consolidate these with ParseError
/// An error that occurred during parsing of an f-string.
#[derive(Debug, PartialEq)]
pub struct FStringError {
/// The type of error that occurred.
pub error: FStringErrorType,
/// The location of the error.
pub location: Location,
}

impl FStringError {
/// Creates a new `FStringError` with the given error type and location.
pub fn new(error: FStringErrorType, location: Location) -> Self {
Self { error, location }
}
Expand All @@ -106,19 +141,33 @@ impl From<FStringError> for LexicalError {
}
}

/// Represents the different types of errors that can occur during parsing of an f-string.
#[derive(Debug, PartialEq)]
pub enum FStringErrorType {
/// Expected a right brace after an opened left brace.
UnclosedLbrace,
/// Expected a left brace after an ending right brace.
UnopenedRbrace,
/// Expected a right brace after a conversion flag.
ExpectedRbrace,
/// An error occurred while parsing an f-string expression.
InvalidExpression(Box<ParseErrorType>),
/// An invalid conversion flag was encountered.
InvalidConversionFlag,
/// An empty expression was encountered.
EmptyExpression,
/// An opening delimiter was not closed properly.
MismatchedDelimiter(char, char),
/// Too many nested expressions in an f-string.
ExpressionNestedTooDeeply,
/// The f-string expression cannot include the given character.
ExpressionCannotInclude(char),
/// A single right brace was encountered.
SingleRbrace,
/// A closing delimiter was not opened properly.
Unmatched(char),
// TODO: Test this case.
/// Unterminated string.
UnterminatedString,
}

Expand Down Expand Up @@ -167,9 +216,10 @@ impl From<FStringError> for LalrpopError<Location, Tok, LexicalError> {
}
}

/// Represents an error during parsing
/// Represents an error during parsing.
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;

/// Represents the different types of errors that can occur during parsing.
#[derive(Debug, PartialEq, thiserror::Error)]
pub enum ParseErrorType {
/// Parser encountered an unexpected end of input
Expand All @@ -180,11 +230,12 @@ pub enum ParseErrorType {
InvalidToken,
/// Parser encountered an unexpected token
UnrecognizedToken(Tok, Option<String>),
/// Maps to `User` type from `lalrpop-util`
// Maps to `User` type from `lalrpop-util`
/// Parser encountered an error during lexing.
Lexical(LexicalErrorType),
}

/// Convert `lalrpop_util::ParseError` to our internal type
// Convert `lalrpop_util::ParseError` to our internal type
pub(crate) fn parse_error_from_lalrpop(
err: LalrpopError<Location, Tok, LexicalError>,
source_path: &str,
Expand Down Expand Up @@ -258,6 +309,7 @@ impl fmt::Display for ParseErrorType {
}

impl ParseErrorType {
/// Returns true if the error is an indentation error.
pub fn is_indentation_error(&self) -> bool {
match self {
ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true,
Expand All @@ -267,6 +319,8 @@ impl ParseErrorType {
_ => false,
}
}

/// Returns true if the error is a tab error.
pub fn is_tab_error(&self) -> bool {
matches!(
self,
Expand Down
118 changes: 109 additions & 9 deletions compiler/parser/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,119 @@
//! This crate can be used to parse python sourcecode into a so
//! called AST (abstract syntax tree).
//! This crate can be used to parse Python source code into an Abstract
//! Syntax Tree.
//!
//! The stages involved in this process are lexical analysis and
//! parsing. The lexical analysis splits the sourcecode into
//! tokens, and the parsing transforms those tokens into an AST.
//! ## Overview:
//!
//! For example, one could do this:
//! The process by which source code is parsed into an AST can be broken down
//! into two general stages: [lexical analysis] and [parsing].
//!
//! During lexical analysis, the source code is converted into a stream of lexical
//! tokens that represent the smallest meaningful units of the language. For example,
//! the source code `print("Hello world")` would _roughly_ be converted into the following
//! stream of tokens:
//!
//! ```text
//! Name("print"), LeftParen, String("Hello world"), RightParen
//! ```
//! use rustpython_parser::{parser, ast};
//!
//! let python_source = "print('Hello world')";
//! let python_ast = parser::parse_expression(python_source, "<embedded>").unwrap();
//! these tokens are then consumed by the parser, which matches them against a set of
//! grammar rules to verify that the source code is syntactically valid and to construct
//! an AST that represents the source code.
//!
//! During parsing, the parser consumes the tokens generated by the lexer and constructs
//! a tree representation of the source code. The tree is made up of nodes that represent
//! the different syntactic constructs of the language. If the source code is syntactically
//! invalid, parsing fails and an error is returned. After a successful parse, the AST can
//! be used to perform further analysis on the source code. Continuing with the example
//! above, the AST generated by the parser would _roughly_ look something like this:
//!
//! ```text
//! node: Expr {
//! value: {
//! node: Call {
//! func: {
//! node: Name {
//! id: "print",
//! ctx: Load,
//! },
//! },
//! args: [
//! node: Constant {
//! value: Str("Hello World"),
//! kind: None,
//! },
//! ],
//! keywords: [],
//! },
//! },
//! },
//!```
//!
//! Note: The Tokens/ASTs shown above are not the exact tokens/ASTs generated by the parser.
//!
//! ## Source code layout:
//!
//! The functionality of this crate is split into several modules:
//!
//! - [token]: This module contains the definition of the tokens that are generated by the lexer.
//! - [lexer]: This module contains the lexer and is responsible for generating the tokens.
//! - [parser]: This module contains an interface to the parser and is responsible for generating the AST.
//! - Functions and strings have special parsing requirements that are handled in additional files.
//! - [mode]: This module contains the definition of the different modes that the parser can be in.
//! - [error]: This module contains the definition of the errors that can be returned by the parser.
//!
//! # Examples
//!
//! For example, to get a stream of tokens from a given string, one could do this:
//!
//! ```
//! use rustpython_parser::lexer::make_tokenizer;
//!
//! let python_source = r#"
//! def is_odd(i):
//! return bool(i & 1)
//! "#;
//! let mut tokens = make_tokenizer(python_source);
//! assert!(tokens.all(|t| t.is_ok()));
//! ```
//!
//! These tokens can be directly fed into the parser to generate an AST:
//!
//! ```
//! use rustpython_parser::parser::{parse_tokens, Mode};
//! use rustpython_parser::lexer::make_tokenizer;
//!
//! let python_source = r#"
//! def is_odd(i):
//! return bool(i & 1)
//! "#;
//! let tokens = make_tokenizer(python_source);
//! let ast = parse_tokens(tokens, Mode::Module, "<embedded>");
//!
//! assert!(ast.is_ok());
//! ```
//!
//! Alternatively, you can use one of the other `parse_*` functions to parse a string directly without using a specific
//! mode or tokenizing the source beforehand:
//!
//! ```
//! use rustpython_parser::parser::parse_program;
//!
//! let python_source = r#"
//! def is_odd(i):
//! return bool(i & 1)
//! "#;
//! let ast = parse_program(python_source, "<embedded>");
//!
//! assert!(ast.is_ok());
//! ```
//!
//! [lexical analysis]: https://en.wikipedia.org/wiki/Lexical_analysis
//! [parsing]: https://en.wikipedia.org/wiki/Parsing
//! [token]: crate::token
//! [lexer]: crate::lexer
//! [parser]: crate::parser
//! [mode]: crate::mode
//! [error]: crate::error

#![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")]
#![doc(html_root_url = "https://docs.rs/rustpython-parser/")]
Expand Down
6 changes: 6 additions & 0 deletions compiler/parser/src/mode.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
//! Control in the different modes by which a source file can be parsed.
use crate::token::Tok;

/// The mode argument specifies in what way code must be parsed.
#[derive(Clone, Copy)]
pub enum Mode {
/// The code consists of a sequence of statements.
Module,
/// The code consists of a sequence of interactive statement.
Interactive,
/// The code consists of a single expression.
Expression,
}

Expand Down Expand Up @@ -39,6 +44,7 @@ impl std::str::FromStr for Mode {
}
}

/// Returned when a given mode is not valid.
#[derive(Debug)]
pub struct ModeParseError {
_priv: (),
Expand Down
Loading