Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 42 additions & 15 deletions examples/parse_folder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ extern crate log;
use clap::{App, Arg};

use rustpython_parser::{ast, parser};
use std::path::{Path, PathBuf};
use std::time::Instant;
use std::path::Path;
use std::time::{Duration, Instant};

fn main() {
env_logger::init();
Expand Down Expand Up @@ -61,30 +61,45 @@ fn parse_folder(path: &Path) -> std::io::Result<Vec<ParsedFile>> {
}

if metadata.is_file() && path.extension().and_then(|s| s.to_str()) == Some("py") {
let result = parse_python_file(&path);
match &result {
let parsed_file = parse_python_file(&path);
match &parsed_file.result {
Ok(_) => {}
Err(y) => error!("Erreur in file {:?} {:?}", path, y),
}
res.push(ParsedFile {
filename: Box::new(path),
result,
});

res.push(parsed_file);
}
}
Ok(res)
}

fn parse_python_file(filename: &Path) -> ParseResult {
fn parse_python_file(filename: &Path) -> ParsedFile {
info!("Parsing file {:?}", filename);
let source = std::fs::read_to_string(filename).map_err(|e| e.to_string())?;
parser::parse_program(&source).map_err(|e| e.to_string())
match std::fs::read_to_string(filename) {
Err(e) => ParsedFile {
// filename: Box::new(filename.to_path_buf()),
// code: "".to_string(),
num_lines: 0,
result: Err(e.to_string()),
},
Ok(source) => {
let num_lines = source.to_string().lines().count();
let result = parser::parse_program(&source).map_err(|e| e.to_string());
ParsedFile {
// filename: Box::new(filename.to_path_buf()),
// code: source.to_string(),
num_lines,
result,
}
}
}
}

fn statistics(results: ScanResult) {
// println!("Processed {:?} files", res.len());
println!("Scanned a total of {} files", results.parsed_files.len());
let total = results.parsed_files.len();
let total: usize = results.parsed_files.len();
let total_lines: usize = results.parsed_files.iter().map(|p| p.num_lines).sum();
let failed = results
.parsed_files
.iter()
Expand All @@ -103,19 +118,31 @@ fn statistics(results: ScanResult) {
let duration = results.t2 - results.t1;
println!("Total time spend: {:?}", duration);
println!(
"File processing rate: {} files/second",
(total * 1_000_000) as f64 / duration.as_micros() as f64
"Processed {} files. That's {} files/second",
total,
rate(total, duration)
);
println!(
"Processed {} lines of python code. That's {} lines/second",
total_lines,
rate(total_lines, duration)
);
}

fn rate(counter: usize, duration: Duration) -> f64 {
(counter * 1_000_000) as f64 / duration.as_micros() as f64
}

struct ScanResult {
t1: Instant,
t2: Instant,
parsed_files: Vec<ParsedFile>,
}

struct ParsedFile {
filename: Box<PathBuf>,
// filename: Box<PathBuf>,
// code: String,
num_lines: usize,
result: ParseResult,
}

Expand Down
107 changes: 56 additions & 51 deletions parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,18 +340,7 @@ where

/// Lex a hex/octal/decimal/binary number without a decimal point.
fn lex_number_radix(&mut self, start_pos: Location, radix: u32) -> LexResult {
let mut value_text = String::new();

loop {
if let Some(c) = self.take_number(radix) {
value_text.push(c);
} else if self.chr0 == Some('_') {
self.next_char();
} else {
break;
}
}

let value_text = self.radix_run(radix);
let end_pos = self.get_pos();
let value = BigInt::from_str_radix(&value_text, radix).map_err(|e| LexicalError {
error: LexicalErrorType::OtherError(format!("{:?}", e)),
Expand All @@ -360,24 +349,19 @@ where
Ok((start_pos, Tok::Int { value }, end_pos))
}

/// Lex a normal number, that is, no octal, hex or binary number.
fn lex_normal_number(&mut self) -> LexResult {
let start_pos = self.get_pos();

let mut value_text = String::new();

// Normal number:
while let Some(c) = self.take_number(10) {
value_text.push(c);
}
let mut value_text = self.radix_run(10);

// If float:
if self.chr0 == Some('.') || self.at_exponent() {
// Take '.':
if self.chr0 == Some('.') {
value_text.push(self.next_char().unwrap());
while let Some(c) = self.take_number(10) {
value_text.push(c);
}
value_text.push_str(&self.radix_run(10));
}

// 1e6 for example:
Expand All @@ -389,9 +373,7 @@ where
value_text.push(self.next_char().unwrap());
}

while let Some(c) = self.take_number(10) {
value_text.push(c);
}
value_text.push_str(&self.radix_run(10));
}

let value = f64::from_str(&value_text).unwrap();
Expand Down Expand Up @@ -426,6 +408,57 @@ where
}
}

/// Consume a sequence of numbers with the given radix,
/// the digits can be decorated with underscores
/// like this: '1_2_3_4' == '1234'
fn radix_run(&mut self, radix: u32) -> String {
let mut value_text = String::new();
loop {
if let Some(c) = self.take_number(radix) {
value_text.push(c);
} else if self.chr0 == Some('_') && Lexer::<T>::is_digit_of_radix(self.chr1, radix) {
self.next_char();
} else {
break;
}
}
value_text
}

/// Consume a single character with the given radix.
fn take_number(&mut self, radix: u32) -> Option<char> {
let take_char = Lexer::<T>::is_digit_of_radix(self.chr0, radix);

if take_char {
Some(self.next_char().unwrap())
} else {
None
}
}

/// Test if a digit is of a certain radix.
fn is_digit_of_radix(c: Option<char>, radix: u32) -> bool {
match radix {
2 => match c {
Some('0'..='1') => true,
_ => false,
},
8 => match c {
Some('0'..='7') => true,
_ => false,
},
10 => match c {
Some('0'..='9') => true,
_ => false,
},
16 => match c {
Some('0'..='9') | Some('a'..='f') | Some('A'..='F') => true,
_ => false,
},
x => unimplemented!("Radix not implemented: {}", x),
}
}

/// Test if we face '[eE][-+]?[0-9]+'
fn at_exponent(&self) -> bool {
match self.chr0 {
Expand Down Expand Up @@ -626,34 +659,6 @@ where
}
}

fn take_number(&mut self, radix: u32) -> Option<char> {
let take_char = match radix {
2 => match self.chr0 {
Some('0'..='1') => true,
_ => false,
},
8 => match self.chr0 {
Some('0'..='7') => true,
_ => false,
},
10 => match self.chr0 {
Some('0'..='9') => true,
_ => false,
},
16 => match self.chr0 {
Some('0'..='9') | Some('a'..='f') | Some('A'..='F') => true,
_ => false,
},
x => unimplemented!("Radix not implemented: {}", x),
};

if take_char {
Some(self.next_char().unwrap())
} else {
None
}
}

/// This is the main entry point. Call this function to retrieve the next token.
/// This function is used by the iterator implementation.
fn inner_next(&mut self) -> LexResult {
Expand Down
6 changes: 3 additions & 3 deletions parser/src/python.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ ImportDots: usize = {

ImportAsNames: Vec<ast::ImportSymbol> = {
<i:OneOrMore<ImportAsAlias<Identifier>>> => i,
"(" <i:OneOrMore<ImportAsAlias<Identifier>>> ")" => i,
"(" <i:OneOrMore<ImportAsAlias<Identifier>>> ","? ")" => i,
"*" => {
// Star import all
vec![ast::ImportSymbol { symbol: "*".to_string(), alias: None }]
Expand Down Expand Up @@ -952,11 +952,11 @@ Atom: ast::Expression = {
};

ListLiteralValues: Vec<ast::Expression> = {
<e:OneOrMore<TestOrStarExpr>> <_trailing_comma:","?> => e,
<e:OneOrMore<TestOrStarExpr>> ","? => e,
};

DictLiteralValues: Vec<(Option<ast::Expression>, ast::Expression)> = {
<elements:OneOrMore<DictElement>> <_trailing_comma:","?> => elements,
<elements:OneOrMore<DictElement>> ","? => elements,
};

DictEntry: (ast::Expression, ast::Expression) = {
Expand Down
11 changes: 11 additions & 0 deletions tests/snippets/numbers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from testutils import assertRaises

x = 5
x.__init__(6)
assert x == 5
Expand Down Expand Up @@ -42,3 +44,12 @@ class A(int):
assert int(1).__rxor__(1) == 0
assert int(3).__rxor__(-3) == -2
assert int(3).__rxor__(4) == 7

# Test underscores in numbers:
assert 1_2 == 12
assert 1_2_3 == 123
assert 1_2.3_4 == 12.34
assert 1_2.3_4e0_0 == 12.34

with assertRaises(SyntaxError):
eval('1__2')