diff --git a/Cargo.lock b/Cargo.lock index 1568efd..18a98ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -33,6 +33,7 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" name = "flpc" version = "0.1.0" dependencies = [ + "lazy_static", "pyo3", "regex", ] @@ -49,6 +50,12 @@ version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.155" diff --git a/Cargo.toml b/Cargo.toml index e978c83..2556c43 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,4 @@ crate-type = ["cdylib"] [dependencies] pyo3 = "0.21.1" regex = "1.10.5" +lazy_static = "1.4.0" \ No newline at end of file diff --git a/README.md b/README.md index 84b501a..cc26a84 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,61 @@
- + flpc logo -# flpc -Star -PyPI - Implementation -GitHub Issues or Pull Requests -PyPI - Downloads + # flpc: Lightning-Fast Python Regex + ![Star](https://img.shields.io/badge/Please%20Give%20A%20Star%20%E2%AD%90-30323D?style=flat-square) + ![PyPI - Implementation](https://img.shields.io/pypi/implementation/flpc?style=flat-square) + ![GitHub Issues](https://img.shields.io/github/issues/itsmeadarsh2008/flpc?style=flat-square) + ![PyPI - Downloads](https://img.shields.io/pypi/dd/flpc?style=flat-square) + ![GitHub License](https://img.shields.io/github/license/itsmeadarsh2008/flpc?style=flat-square) + ![GitHub last commit](https://img.shields.io/github/last-commit/itsmeadarsh2008/flpc?display_timestamp=committer&style=flat-square) -A Rust-based **[regex crate](https://crates.io/crates/regex) wrapper** for Python3 to get faster performance. 👾 -## DEAD-SIMPLE -Just import `flpc` as `re` and use it as you like and the namings are same as the native `re` module. Only thing is that `match` function name of the `re` native library is replaced with `fmatch` - -### MIT Licensed + 🚀 Supercharge your Python regex with Rust-powered performance!
+ +## 🌟 Why flpc? + +flpc is a powerful Python library that wraps the blazing-fast [Rust regex crate](https://crates.io/crates/regex), bringing enhanced speed to your regular expression operations. It's designed to be a drop-in replacement for Python's native `re` module, with some minor syntax differences. + +## 🚀 Quick Start + +1. Install flpc: + ``` + pip install flpc + ``` + +2. Use it in your code as shown in the API + +## 🔧 API + +flpc mirrors the `re` module's API, with a few small exceptions: + +- Use `fmatch()` instead of `match()` (to avoid conflicts with Python's keyword) +- When using `group()` on a match object, always provide an index (e.g., `group(0)` for the entire match) + +Common functions include: + +- `compile()` +- `search()` +- `findall()` +- `finditer()` +- `split()` +- `sub()` +- `subn()` + +## 💡 Pro Tips + +- Pre-compile your patterns for faster execution +- Use raw strings (`r''`) for cleaner regex patterns +- Always check if a match is found before accessing groups +- Remember to use `group(0)` to get the entire match + +## 🤝 Contributing + +We welcome contributions! Whether it's bug reports, feature requests, or code contributions, please feel free to reach out. Check our [contribution guidelines](CONTRIBUTING.md) to get started. + +## 📄 License + +flpc is open-source software licensed under the MIT license. \ No newline at end of file diff --git a/examples/helloworld.py b/examples/groups.py similarity index 100% rename from examples/helloworld.py rename to examples/groups.py diff --git a/pyproject.toml b/pyproject.toml index 179c783..f5cd15d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "flpc" -version = "0.1.4" -description = "A Rust-based regex crate wrapper for Python3 to get faster performance. 👾" +version = "0.2.0" +description = "A Lightning Fast ⚡ Rust-based regex crate wrapper for Python3 to get faster performance. 👾" maintainers = [{ name = "Adarsh Gourab Mahalik", email = "gourabmahalikadarsh@gmail.com" }] readme = "README.md" license = "MIT" diff --git a/src/lib.rs b/src/lib.rs index e8be509..3dffdd4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,9 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::wrap_pyfunction; use regex::{Captures, Regex, RegexBuilder}; +use std::collections::HashMap; +use std::sync::Mutex; +use lazy_static::lazy_static; #[pyclass] struct Pattern { @@ -32,6 +35,11 @@ struct Constants; #[pyclass] struct Sre; +// Global cache for compiled regex patterns +lazy_static! { + static ref REGEX_CACHE: Mutex> = Mutex::new(HashMap::new()); +} + #[pymethods] impl Match { fn group(&self, idx: usize) -> Option { @@ -61,70 +69,72 @@ impl Match { #[pyfunction] fn compile(pattern: &str, flags: Option) -> PyResult { + let flags = flags.unwrap_or(0); + let mut cache = REGEX_CACHE.lock().unwrap(); + + if let Some(regex) = cache.get(&(pattern.to_string(), flags)) { + return Ok(Pattern { regex: regex.clone() }); + } + let mut builder = RegexBuilder::new(pattern); - if let Some(f) = flags { - if f & 0b0001 != 0 { - builder.case_insensitive(true); - } - if f & 0b0010 != 0 { - builder.multi_line(true); - } - if f & 0b0100 != 0 { - builder.dot_matches_new_line(true); - } - // Add other flags as needed + if flags & 0b0001 != 0 { + builder.case_insensitive(true); } + if flags & 0b0010 != 0 { + builder.multi_line(true); + } + if flags & 0b0100 != 0 { + builder.dot_matches_new_line(true); + } + // Add other flags as needed + let regex = builder .build() .map_err(|e| PyValueError::new_err(e.to_string()))?; + + cache.insert((pattern.to_string(), flags), regex.clone()); Ok(Pattern { regex }) } #[pyfunction] fn search(pattern: &Pattern, text: &str) -> PyResult> { - if let Some(captures) = pattern.regex.captures(text) { + pattern.regex.captures(text).map(|captures| { let mat = captures.get(0).unwrap(); Ok(Some(Match { mat: unsafe { std::mem::transmute(mat) }, captures: unsafe { std::mem::transmute(captures) }, })) - } else { - Ok(None) - } + }).unwrap_or(Ok(None)) } #[pyfunction(name = "fmatch")] fn fmatch(pattern: &Pattern, text: &str) -> PyResult> { - if let Some(captures) = pattern.regex.captures(text) { + pattern.regex.captures(text).and_then(|captures| { let mat = captures.get(0).unwrap(); if mat.start() == 0 { - Ok(Some(Match { + Some(Ok(Some(Match { mat: unsafe { std::mem::transmute(mat) }, captures: unsafe { std::mem::transmute(captures) }, - })) + }))) } else { - Ok(None) + None } - } else { - Ok(None) - } + }).unwrap_or(Ok(None)) } #[pyfunction] fn fullmatch(pattern: &Pattern, text: &str) -> PyResult> { - if let Some(captures) = pattern.regex.captures(text) { + pattern.regex.captures(text).and_then(|captures| { let mat = captures.get(0).unwrap(); if mat.as_str() == text { - Ok(Some(Match { + Some(Ok(Some(Match { mat: unsafe { std::mem::transmute(mat) }, captures: unsafe { std::mem::transmute(captures) }, - })) + }))) } else { - Ok(None) + None } - } else { - Ok(None) - } + }).unwrap_or(Ok(None)) } #[pyfunction] @@ -175,7 +185,7 @@ fn escape(text: &str) -> PyResult { #[pyfunction] fn purge() -> PyResult<()> { - // Implement cache purge if necessary + REGEX_CACHE.lock().unwrap().clear(); Ok(()) } @@ -190,7 +200,7 @@ fn flpc(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("__version__", "0.1.4")?; m.add( "__doc__", - "A Rust-based regex crate wrapper for Python3 to get faster performance. 👾", + "", )?; m.add("__name__", "flpc")?; m.add("__package__", "flpc")?; @@ -224,4 +234,4 @@ fn flpc(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(purge, m)?)?; Ok(()) -} +} \ No newline at end of file