-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Updated the re library + test #6648
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 1 commit
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
394fccf
Updated re library + test
terryluan12 c6b5b69
Copied over generate_sre_constants from cpython/Tools
terryluan12 468d314
Customized `generate_sre_constants.py` + ran to update `constants.rs`
terryluan12 53a290a
Annotated failing/erroring tests in test_re & test_async_case + remov…
terryluan12 1c5eb43
Clarified `dump_enum` docstring in `generate_sre_constants.py`
terryluan12 e4bc05c
Auto-format: cargo fmt --all
github-actions[bot] ac5b298
Merge branch 'main' into update_re
terryluan12 583e58b
Clean up generate_sre_constants.py by removing comments
terryluan12 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Customized
generate_sre_constants.py + ran to update constants.rs
- Loading branch information
commit 468d314c5277c8499fad48c32bd809a8a364dc85
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,7 @@ | ||
| #! /usr/bin/env python3 | ||
| # This script generates Modules/_sre/sre_constants.h from Lib/re/_constants.py. | ||
| # This script generates crates/sre_engine/src/constants.rs from Lib/re/_constants.py. | ||
|
|
||
| SCRIPT_NAME = 'scripts/generate_sre_constants.py' | ||
|
|
||
|
|
||
| def update_file(file, content): | ||
|
|
@@ -13,13 +15,13 @@ def update_file(file, content): | |
| fobj.write(content) | ||
| return True | ||
|
|
||
| sre_constants_header = """\ | ||
| sre_constants_header = f"""\ | ||
| /* | ||
| * Secret Labs' Regular Expression Engine | ||
| * | ||
| * regular expression matching engine | ||
| * | ||
| * Auto-generated by Tools/scripts/generate_sre_constants.py from | ||
| * Auto-generated by {SCRIPT_NAME} from | ||
| * Lib/re/_constants.py. | ||
| * | ||
| * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. | ||
|
|
@@ -29,50 +31,128 @@ def update_file(file, content): | |
|
|
||
| """ | ||
|
|
||
| def dump_enum(d, enum_name, derives, strip_prefix=""): | ||
| """Generate Rust enum definitions from a Python dictionary. | ||
|
|
||
| Args: | ||
| d (dict): The dictionary containing the enum variants. | ||
| enum_name (str): The name of the enum to generate. | ||
| derives (str): The derive attributes to include. | ||
| strip_prefix (str, optional): A prefix to strip from the variant names. Defaults to "". | ||
|
|
||
| Returns: | ||
| list: A list of strings representing the enum definition. | ||
| """ | ||
| items = sorted(d) | ||
| content = [f"{derives}\n"] | ||
| content.append("#[repr(u32)]\n") | ||
| content.append("#[allow(non_camel_case_types, clippy::upper_case_acronyms)]\n") | ||
| content.append(f"pub enum {enum_name} {{\n") | ||
| for i, item in enumerate(items): | ||
| name = str(item).removeprefix(strip_prefix) | ||
| content.append(f" {name} = {i},\n") | ||
| content.append("}\n\n") | ||
| return content | ||
|
|
||
|
|
||
| def dump_bitflags(d, prefix, derives, struct_name, int_t): | ||
| """Generate Rust bitflags definitions from a Python dictionary. | ||
|
|
||
| Args: | ||
| d (dict): The dictionary containing the bitflag variants. | ||
| prefix (str): The prefix to strip from the variant names. | ||
| derives (str): The derive attributes to include. | ||
| struct_name (str): The name of the struct to generate. | ||
| int_t (str): The integer type to use for the bitflags. | ||
|
|
||
| Returns: | ||
| list: A list of strings representing the bitflags definition. | ||
| """ | ||
| items = [(value, name) for name, value in d.items() | ||
| if name.startswith(prefix)] | ||
| content = ["bitflags! {\n"] | ||
| content.append(f"{derives}\n") if derives else None | ||
| content.append(f" pub struct {struct_name}: {int_t} {{\n") | ||
| for value, name in sorted(items): | ||
| name = str(name).removeprefix(prefix) | ||
| content.append(f" const {name} = {value};\n") | ||
| content.append(" }\n") | ||
| content.append("}\n\n") | ||
| return content | ||
|
|
||
| def main( | ||
| infile="Lib/re/_constants.py", | ||
| outfile_constants="Modules/_sre/sre_constants.h", | ||
| outfile_targets="Modules/_sre/sre_targets.h", | ||
| outfile_constants="crates/sre_engine/src/constants.rs", | ||
| ): | ||
| ns = {} | ||
| with open(infile) as fp: | ||
| code = fp.read() | ||
| exec(code, ns) | ||
|
|
||
| def dump(d, prefix): | ||
| items = sorted(d) | ||
| for item in items: | ||
| yield "#define %s_%s %d\n" % (prefix, item, item) | ||
|
|
||
| def dump2(d, prefix): | ||
| items = [(value, name) for name, value in d.items() | ||
| if name.startswith(prefix)] | ||
| for value, name in sorted(items): | ||
| yield "#define %s %d\n" % (name, value) | ||
|
|
||
| def dump_gotos(d, prefix): | ||
| for i, item in enumerate(sorted(d)): | ||
| assert i == item | ||
| yield f" &&{prefix}_{item},\n" | ||
|
|
||
| content = [sre_constants_header] | ||
| content.append("#define SRE_MAGIC %d\n" % ns["MAGIC"]) | ||
| content.extend(dump(ns["OPCODES"], "SRE_OP")) | ||
| content.extend(dump(ns["ATCODES"], "SRE")) | ||
| content.extend(dump(ns["CHCODES"], "SRE")) | ||
| content.extend(dump2(ns, "SRE_FLAG_")) | ||
| content.extend(dump2(ns, "SRE_INFO_")) | ||
|
|
||
| update_file(outfile_constants, ''.join(content)) | ||
| content.append("use bitflags::bitflags;\n\n") | ||
| content.append(f"pub const SRE_MAGIC: usize = {ns['MAGIC']};\n") | ||
| content.extend(dump_enum(ns["OPCODES"], "SreOpcode", "#[derive(num_enum::TryFromPrimitive, Debug, PartialEq, Eq)]")) | ||
| content.extend(dump_enum(ns["ATCODES"], "SreAtCode", "#[derive(num_enum::TryFromPrimitive, Debug, PartialEq, Eq)]", "AT_")) | ||
| content.extend(dump_enum(ns["CHCODES"], "SreCatCode", "#[derive(num_enum::TryFromPrimitive, Debug)]", "CATEGORY_")) | ||
|
|
||
| content = [sre_constants_header] | ||
| content.append(f"static void *sre_targets[{len(ns['OPCODES'])}] = {{\n") | ||
| content.extend(dump_gotos(ns["OPCODES"], "TARGET_SRE_OP")) | ||
| content.append("};\n") | ||
| content.extend(dump_bitflags(ns, "SRE_FLAG_", "#[derive(Debug, PartialEq, Eq, Clone, Copy)]", "SreFlag", "u16")) | ||
| content.extend(dump_bitflags(ns, "SRE_INFO_", "", "SreInfo", "u32")) | ||
|
|
||
| update_file(outfile_targets, ''.join(content)) | ||
| update_file(outfile_constants, ''.join(content)) | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| import sys | ||
| main(*sys.argv[1:]) | ||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
| # dump(f, OPCODES, "SreOpcode", "u32", "") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are these lines supposed to be deleted?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ... yep. They were deleted in a commit which I believe was lost to the ether on rebase lol |
||
| # dump(f, ATCODES, "SreAtCode", "u32", "AT_") | ||
| # dump(f, CHCODES, "SreCatCode", "u32", "CATEGORY_") | ||
|
|
||
| # def bitflags(typ, int_t, prefix, flags): | ||
| # f.write(f"""\ | ||
| # bitflags! {{ | ||
| # pub struct {typ}: {int_t} {{ | ||
| # """) | ||
| # for name in flags: | ||
| # val = globals()[prefix + name] | ||
| # f.write(f" const {name} = {val};\n") | ||
| # f.write("""\ | ||
| # } | ||
| # } | ||
| # """) | ||
|
|
||
| # bitflags("SreFlag", "u16", "SRE_FLAG_", [ | ||
| # "TEMPLATE", | ||
| # "IGNORECASE", | ||
| # "LOCALE", | ||
| # "MULTILINE", | ||
| # "DOTALL", | ||
| # "UNICODE", | ||
| # "VERBOSE", | ||
| # "DEBUG", | ||
| # "ASCII", | ||
| # ]) | ||
|
|
||
| # bitflags("SreInfo", "u32", "SRE_INFO_", [ | ||
| # "PREFIX", "LITERAL", "CHARSET", | ||
| # ]) | ||
|
|
||
| # print("done") | ||
|
|
||
|
|
||
|
|
||
|
|
||
| # if __name__ == "__main__": | ||
| # import sys | ||
| # if len(sys.argv) > 1: | ||
| # constants_file = sys.argv[1] | ||
| # else: | ||
| # import os | ||
| # constants_file = os.path.join(os.path.dirname(__file__), "../../sre-engine/src/constants.rs") | ||
| # with open(constants_file, "w") as f: | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.