Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Customized generate_sre_constants.py + ran to update constants.rs
  • Loading branch information
terryluan12 committed Jan 5, 2026
commit 468d314c5277c8499fad48c32bd809a8a364dc85
14 changes: 9 additions & 5 deletions crates/sre_engine/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@
*
* regular expression matching engine
*
* NOTE: This file is generated by sre_constants.py. If you need
* to change anything in here, edit sre_constants.py and run it.
* Auto-generated by scripts/generate_sre_constants.py from
* Lib/re/_constants.py.
Comment thread
terryluan12 marked this conversation as resolved.
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
* See the _sre.c file for information on usage and redistribution.
* See the sre.c file for information on usage and redistribution.
*/

use bitflags::bitflags;

pub const SRE_MAGIC: usize = 20221023;
pub const SRE_MAGIC: usize = 20230612;
Comment thread
terryluan12 marked this conversation as resolved.
#[derive(num_enum::TryFromPrimitive, Debug, PartialEq, Eq)]
#[repr(u32)]
#[allow(non_camel_case_types, clippy::upper_case_acronyms)]
Expand Down Expand Up @@ -62,6 +62,7 @@ pub enum SreOpcode {
NOT_LITERAL_UNI_IGNORE = 41,
RANGE_UNI_IGNORE = 42,
}

#[derive(num_enum::TryFromPrimitive, Debug, PartialEq, Eq)]
#[repr(u32)]
#[allow(non_camel_case_types, clippy::upper_case_acronyms)]
Expand All @@ -79,6 +80,7 @@ pub enum SreAtCode {
UNI_BOUNDARY = 10,
UNI_NON_BOUNDARY = 11,
}

#[derive(num_enum::TryFromPrimitive, Debug)]
#[repr(u32)]
#[allow(non_camel_case_types, clippy::upper_case_acronyms)]
Expand All @@ -102,10 +104,10 @@ pub enum SreCatCode {
UNI_LINEBREAK = 16,
UNI_NOT_LINEBREAK = 17,
}

bitflags! {
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct SreFlag: u16 {
const TEMPLATE = 1;
const IGNORECASE = 2;
const LOCALE = 4;
const MULTILINE = 8;
Expand All @@ -116,10 +118,12 @@ bitflags! {
const ASCII = 256;
}
}

bitflags! {
pub struct SreInfo: u32 {
const PREFIX = 1;
const LITERAL = 2;
const CHARSET = 4;
}
}

148 changes: 114 additions & 34 deletions scripts/generate_sre_constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#! /usr/bin/env python3
# This script generates Modules/_sre/sre_constants.h from Lib/re/_constants.py.
# This script generates crates/sre_engine/src/constants.rs from Lib/re/_constants.py.

SCRIPT_NAME = 'scripts/generate_sre_constants.py'


def update_file(file, content):
Expand All @@ -13,13 +15,13 @@ def update_file(file, content):
fobj.write(content)
return True

sre_constants_header = """\
sre_constants_header = f"""\
/*
* Secret Labs' Regular Expression Engine
*
* regular expression matching engine
*
* Auto-generated by Tools/scripts/generate_sre_constants.py from
* Auto-generated by {SCRIPT_NAME} from
* Lib/re/_constants.py.
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Expand All @@ -29,50 +31,128 @@ def update_file(file, content):

"""

def dump_enum(d, enum_name, derives, strip_prefix=""):
"""Generate Rust enum definitions from a Python dictionary.

Args:
d (dict): The dictionary containing the enum variants.
enum_name (str): The name of the enum to generate.
derives (str): The derive attributes to include.
strip_prefix (str, optional): A prefix to strip from the variant names. Defaults to "".

Returns:
list: A list of strings representing the enum definition.
"""
items = sorted(d)
content = [f"{derives}\n"]
content.append("#[repr(u32)]\n")
content.append("#[allow(non_camel_case_types, clippy::upper_case_acronyms)]\n")
content.append(f"pub enum {enum_name} {{\n")
for i, item in enumerate(items):
name = str(item).removeprefix(strip_prefix)
content.append(f" {name} = {i},\n")
content.append("}\n\n")
return content


def dump_bitflags(d, prefix, derives, struct_name, int_t):
"""Generate Rust bitflags definitions from a Python dictionary.

Args:
d (dict): The dictionary containing the bitflag variants.
prefix (str): The prefix to strip from the variant names.
derives (str): The derive attributes to include.
struct_name (str): The name of the struct to generate.
int_t (str): The integer type to use for the bitflags.

Returns:
list: A list of strings representing the bitflags definition.
"""
items = [(value, name) for name, value in d.items()
if name.startswith(prefix)]
content = ["bitflags! {\n"]
content.append(f"{derives}\n") if derives else None
content.append(f" pub struct {struct_name}: {int_t} {{\n")
for value, name in sorted(items):
name = str(name).removeprefix(prefix)
content.append(f" const {name} = {value};\n")
content.append(" }\n")
content.append("}\n\n")
return content

def main(
infile="Lib/re/_constants.py",
outfile_constants="Modules/_sre/sre_constants.h",
outfile_targets="Modules/_sre/sre_targets.h",
outfile_constants="crates/sre_engine/src/constants.rs",
):
ns = {}
with open(infile) as fp:
code = fp.read()
exec(code, ns)

def dump(d, prefix):
items = sorted(d)
for item in items:
yield "#define %s_%s %d\n" % (prefix, item, item)

def dump2(d, prefix):
items = [(value, name) for name, value in d.items()
if name.startswith(prefix)]
for value, name in sorted(items):
yield "#define %s %d\n" % (name, value)

def dump_gotos(d, prefix):
for i, item in enumerate(sorted(d)):
assert i == item
yield f" &&{prefix}_{item},\n"

content = [sre_constants_header]
content.append("#define SRE_MAGIC %d\n" % ns["MAGIC"])
content.extend(dump(ns["OPCODES"], "SRE_OP"))
content.extend(dump(ns["ATCODES"], "SRE"))
content.extend(dump(ns["CHCODES"], "SRE"))
content.extend(dump2(ns, "SRE_FLAG_"))
content.extend(dump2(ns, "SRE_INFO_"))

update_file(outfile_constants, ''.join(content))
content.append("use bitflags::bitflags;\n\n")
content.append(f"pub const SRE_MAGIC: usize = {ns['MAGIC']};\n")
content.extend(dump_enum(ns["OPCODES"], "SreOpcode", "#[derive(num_enum::TryFromPrimitive, Debug, PartialEq, Eq)]"))
content.extend(dump_enum(ns["ATCODES"], "SreAtCode", "#[derive(num_enum::TryFromPrimitive, Debug, PartialEq, Eq)]", "AT_"))
content.extend(dump_enum(ns["CHCODES"], "SreCatCode", "#[derive(num_enum::TryFromPrimitive, Debug)]", "CATEGORY_"))

content = [sre_constants_header]
content.append(f"static void *sre_targets[{len(ns['OPCODES'])}] = {{\n")
content.extend(dump_gotos(ns["OPCODES"], "TARGET_SRE_OP"))
content.append("};\n")
content.extend(dump_bitflags(ns, "SRE_FLAG_", "#[derive(Debug, PartialEq, Eq, Clone, Copy)]", "SreFlag", "u16"))
content.extend(dump_bitflags(ns, "SRE_INFO_", "", "SreInfo", "u32"))

update_file(outfile_targets, ''.join(content))
update_file(outfile_constants, ''.join(content))


if __name__ == '__main__':
import sys
main(*sys.argv[1:])





# dump(f, OPCODES, "SreOpcode", "u32", "")
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these lines supposed to be deleted?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... yep. They were deleted in a commit which I believe was lost to the ether on rebase lol

# dump(f, ATCODES, "SreAtCode", "u32", "AT_")
# dump(f, CHCODES, "SreCatCode", "u32", "CATEGORY_")

# def bitflags(typ, int_t, prefix, flags):
# f.write(f"""\
# bitflags! {{
# pub struct {typ}: {int_t} {{
# """)
# for name in flags:
# val = globals()[prefix + name]
# f.write(f" const {name} = {val};\n")
# f.write("""\
# }
# }
# """)

# bitflags("SreFlag", "u16", "SRE_FLAG_", [
# "TEMPLATE",
# "IGNORECASE",
# "LOCALE",
# "MULTILINE",
# "DOTALL",
# "UNICODE",
# "VERBOSE",
# "DEBUG",
# "ASCII",
# ])

# bitflags("SreInfo", "u32", "SRE_INFO_", [
# "PREFIX", "LITERAL", "CHARSET",
# ])

# print("done")




# if __name__ == "__main__":
# import sys
# if len(sys.argv) > 1:
# constants_file = sys.argv[1]
# else:
# import os
# constants_file = os.path.join(os.path.dirname(__file__), "../../sre-engine/src/constants.rs")
# with open(constants_file, "w") as f: