diff --git a/.gitignore b/.gitignore index 8b3e244..4cb3c8a 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,6 @@ composer.lock run-cache.php .phpunit.result.cache -.php-cs.fixer.cache +.php-cs-fixer.cache phpunit.xml .note.md diff --git a/.php-cs-fixer.cache b/.php-cs-fixer.cache deleted file mode 100644 index c6a6c15..0000000 --- a/.php-cs-fixer.cache +++ /dev/null @@ -1 +0,0 @@ -{"php":"8.2.9","version":"3.24.0","indent":" ","lineEnding":"\n","rules":{"binary_operator_spaces":{"default":"at_least_single_space"},"blank_line_after_opening_tag":true,"blank_line_between_import_groups":true,"blank_lines_before_namespace":true,"class_definition":{"inline_constructor_arguments":false,"space_before_parenthesis":true},"compact_nullable_typehint":true,"curly_braces_position":{"allow_single_line_empty_anonymous_classes":true},"declare_equal_normalize":true,"lowercase_cast":true,"lowercase_static_reference":true,"new_with_braces":true,"no_blank_lines_after_class_opening":true,"no_leading_import_slash":true,"no_whitespace_in_blank_line":true,"ordered_class_elements":{"order":["use_trait"]},"ordered_imports":{"imports_order":["class","function","const"],"sort_algorithm":"none"},"return_type_declaration":true,"short_scalar_cast":true,"single_import_per_statement":{"group_to_single_imports":false},"single_trait_insert_per_statement":true,"ternary_operator_spaces":true,"visibility_required":true,"blank_line_after_namespace":true,"constant_case":true,"control_structure_braces":true,"control_structure_continuation_position":true,"elseif":true,"function_declaration":true,"indentation_type":true,"line_ending":true,"lowercase_keywords":true,"method_argument_space":{"on_multiline":"ensure_fully_multiline"},"no_break_comment":true,"no_closing_tag":true,"no_multiple_statements_per_line":true,"no_space_around_double_colon":true,"no_spaces_after_function_name":true,"no_trailing_whitespace":true,"no_trailing_whitespace_in_comment":true,"single_blank_line_at_eof":true,"single_class_element_per_statement":{"elements":["property"]},"single_line_after_imports":true,"spaces_inside_parentheses":true,"statement_indentation":true,"switch_case_semicolon_to_colon":true,"switch_case_space":true,"encoding":true,"full_opening_tag":true},"hashes":{"src\/Trait\/HasSelectorProperty.php":"8a2ea3e5316cac3317aeb4ab0c102d8e","src\/Trait\/HasAliasProperty.php":"e0a48b0bf89bfad94338a3453d213482","src\/Trait\/HasSourceProperty.php":"10ca30063c79170babfbf9cf2b1115a6","src\/Trait\/HasNodeProperty.php":"2ad45bd8aad43e1161349f6424722dea","src\/Trait\/HasRawProperty.php":"1d2749898a1f5013414609c07645bd89","src\/Trait\/HasCallbackProperty.php":"753ed7db315135f29114ac0a44191f82","src\/Trait\/HasOperatorProperty.php":"2820034730238d984aa086c3ae3728b6","src\/RegisterAdapter.php":"2280c1e2da84ca7e5713513bd426f8a1","src\/CallbackAdapter.php":"86aa99d17a05f7d5330835351f40f995","src\/Source.php":"3ba79634b9d506c415718849d2d8af4c","src\/Adapter\/DefaultCallbackAdapter.php":"8ada7e634b6d02e864f6a96790f45b8d","src\/Adapter\/ClosureCallbackAdapter.php":"c7cb28c2115265e6bf5a1cc26178aa8a","src\/Adapter\/AttributeCallbackAdapter.php":"9cbf992f1d677c5d70d4efc1c2e11109","src\/Adapter\/UpperCallbackAdapter.php":"4cfed9bf9fc856345a62ed809b635a33","src\/Adapter\/LowerCallbackAdapter.php":"034176fe6cac587fb19d1d4580779aea","src\/Adapter\/ReverseCallbackAdapter.php":"9153744dee7eefcef28922f5143bea98","src\/Adapter\/AppendNodeCallbackAdapter.php":"afeb24e1702d7ce72f3d92c3b4413a28","src\/Adapter\/LengthCallbackAdapter.php":"3409544679e32175b096c43bdc429a10","src\/Adapter\/ReplaceCallbackAdapter.php":"03e54dbc90ff70f99aaae2ef92eef88e","src\/Definer.php":"605b9e4ba7118de6e50780b60d65b8ed","src\/Parser.php":"89b831205b2a2cde933bab662d583d48","src\/Loader.php":"2dc6678e10448d308c1d415ac281836d","src\/DefinerExtractor.php":"4bb1cd8f8cd040d5be918a6d8c0061cc","src\/Loader\/DOMCrawlerLoader.php":"8aba0161c65863a40637488d49ec7970","src\/CqueryException.php":"83c5a32d1b73811297b6fdb2d07183ec","src\/Cquery.php":"8cb1419150a9feb43f79fcf5f745a008","src\/Support\/Collection.php":"49e18301b12a058d4b8fc396ca47d404","src\/Support\/RegExp.php":"5739693554a6e3d8dc77e1c0c7f4a8eb","src\/Support\/Str.php":"51f52dfad7bb5f3bf683123498b4345d","src\/Filter.php":"f61b19efce20a7dbbc924c4d4bf544fa","tests\/FilterCqueryTest.php":"0e01f835fd15e1fe6ea62060278238dd","tests\/SampleTest.php":"0609dc3489806e9ddec1209997e599ad","tests\/DefinerTest.php":"bc52332ba144ff4f2597749f8b67cac4","tests\/SourceTest.php":"d6c2f64db61d906c498519341e83ad9a","tests\/CadillacCarDatabaseTest.php":"5d142cebd6c3e6e7ffee039658aaf58c"}} \ No newline at end of file diff --git a/README.md b/README.md index 7b0b7ba..cc86bcc 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,7 @@ For example, you have a simple HTML element as shown below. -### List function available +### List definer available Below are the functions you are can use, they may change over time.
**Note:** nested function has been supported. | function | example | description | diff --git a/composer.json b/composer.json index 7868dca..e05b5f8 100644 --- a/composer.json +++ b/composer.json @@ -36,7 +36,8 @@ "symfony/http-client": "^5.4|^6.3", "symfony/deprecation-contracts": "^2.5|^3.4", "symfony/dom-crawler": "5.4|^6.3", - "doctrine/collections": "^1.8|^2.1|^3.0" + "doctrine/collections": "^1.8|^2.1|^3.0", + "cocur/slugify": "dev-main" }, "require-dev": { "phpunit/phpunit": "^8.0|^9.0|^10.0", diff --git a/src/Cquery.php b/src/Cquery.php index a81b73e..ab427e0 100644 --- a/src/Cquery.php +++ b/src/Cquery.php @@ -33,13 +33,20 @@ class Cquery */ private $loader; + /** + * A variable used to store the results of a query + * + * @var \Doctrine\Common\Collections\ArrayCollection + * + * The default results is null + */ private $results; /** * Create a new Cquery instance. * - * @param \DOMNodeList|\DOMNode|string|null $source A source to use as the the source data, u can put html - * content/url page to scrape default is null + * @param \DOMNodeList|\DOMNode|string|null $source A source to use as the the source data + * u can put html content/url page to scrape default is null * * @param string $contentType Type of Data Content to be Used as Data Source default is 'html' */ @@ -58,32 +65,32 @@ public function __construct(string $source = null, $contentType = "html") } /** - * Adds a definer to the current source. - * + * Adds a source based on data given. * This method is used to determine the HTML element selector * that will serve as a property in each array element. * - * @param \Cacing69\Cquery\Definer|string $picks a selector to grab on element + * @param string $value set a source element selector to activate query * @return \Cacing69\Cquery\Cquery - * @throws \Cacing69\Cquery\CqueryException when the provided parameter is incorrect." */ - public function define(...$defines): Cquery + public function from(string $value) { - $this->loader->define(...$defines); + $this->loader->from($value); return $this; } /** - * Adds a source based on data given. + * Adds a definer to the current source. + * * This method is used to determine the HTML element selector * that will serve as a property in each array element. * - * @param string $value set a source element selector to activate query + * @param \Cacing69\Cquery\Definer|string $picks a selector to grab on element * @return \Cacing69\Cquery\Cquery + * @throws \Cacing69\Cquery\CqueryException when the provided parameter is incorrect." */ - public function from(string $value) + public function define(...$defines): Cquery { - $this->loader->from($value); + $this->loader->define(...$defines); return $this; } @@ -102,7 +109,7 @@ public function limit(int $limit) } /** - * Take a first reesult from result collection + * Take a first result from result collection * * @return array */ @@ -147,7 +154,7 @@ public function orFilter($node, $operator = null, $value = null): Cquery } /** - * Take a result from query + * Take a result query from loader * * @return ArrayCollection */ @@ -188,38 +195,7 @@ public function getSource() public function client($clientType) { - // $this->loader->setClientType($clientType); - + $this->loader->setClientType($clientType); return $this; } - - public static function getAsync($results, $chunk) - { - $loop = Loop::get(); - $client = new Browser($loop); - $results = array_chunk($results, 25); - - foreach ($results as $key => $_chunks) { - foreach ($_chunks as $_key => $_result) { - $client - // ->withHeader("Key", "value") - // ->withHeader("Key", "value") - ->get($_result["url"]) - ->then(function (ResponseInterface $response) use (&$results, $key, $_key) { - $detail = new Cquery((string) $response->getBody()); - - $resultDetail = $detail - ->from(".spec") - ->define( - ".specleft tr:nth-child(1) > td.data as price" - ) - ->first(); - $results[$key][$_key]["price"] = $resultDetail["price"]; - }); - } - $loop->run(); - } - - return array_merge(...$results); - } } diff --git a/src/Loader.php b/src/Loader.php index ff10abe..41579bf 100644 --- a/src/Loader.php +++ b/src/Loader.php @@ -16,8 +16,8 @@ abstract class Loader { use HasSourceProperty; protected $limit = null; - protected $clientType = "browser-kit"; protected $client; + protected $clientType = "browser-kit"; protected $uri = null; protected $isRemote = false; @@ -238,6 +238,12 @@ public function setCallbackCompose(Closure $closure) return $this; } + public function setClientType(string $clientType) + { + $this->clientType = $clientType; + return $this; + } + public function getResults() { return $this->results; diff --git a/src/RegisterAdapter.php b/src/RegisterAdapter.php index acaa146..b3813c4 100644 --- a/src/RegisterAdapter.php +++ b/src/RegisterAdapter.php @@ -11,6 +11,14 @@ use Cacing69\Cquery\Adapter\ReverseCallbackAdapter; use Cacing69\Cquery\Adapter\UpperCallbackAdapter; +/** + * RegisterAdapter used to register available adapters, this adapter is utilized during create definer and filter. + * + * @author Ibnul Mutaki + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ class RegisterAdapter { public static function load() diff --git a/src/Source.php b/src/Source.php index 0f255bd..43494a5 100644 --- a/src/Source.php +++ b/src/Source.php @@ -9,6 +9,14 @@ use Cacing69\Cquery\Trait\HasRawProperty; use Symfony\Component\CssSelector\CssSelectorConverter; +/** + * Source class used to define the source element to be scraped + * + * @author Ibnul Mutaki + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ class Source { use HasAliasProperty; diff --git a/src/Support/Str.php b/src/Support/Str.php index 28270fe..8b08bef 100644 --- a/src/Support/Str.php +++ b/src/Support/Str.php @@ -2,72 +2,15 @@ namespace Cacing69\Cquery\Support; +use Cocur\Slugify\Slugify; + class Str { - // https://stackoverflow.com/a/2955521/10232729 - public static function slug($text, $divider = "_"): string + public static function slug($text): string { - $replace = [ - '<' => '', '>' => '', '-' => ' ', '&' => '', '.' => '', - '"' => '', 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', - 'Ä' => 'A', 'Å' => 'A', 'Ā' => 'A', 'Ą' => 'A', 'Ă' => 'A', 'Æ' => 'Ae', - 'Ç' => 'C', 'Ć' => 'C', 'Č' => 'C', 'Ĉ' => 'C', 'Ċ' => 'C', 'Ď' => 'D', 'Đ' => 'D', - 'Ð' => 'D', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ē' => 'E', - 'Ę' => 'E', 'Ě' => 'E', 'Ĕ' => 'E', 'Ė' => 'E', 'Ĝ' => 'G', 'Ğ' => 'G', - 'Ġ' => 'G', 'Ģ' => 'G', 'Ĥ' => 'H', 'Ħ' => 'H', 'Ì' => 'I', 'Í' => 'I', - 'Î' => 'I', 'Ï' => 'I', 'Ī' => 'I', 'Ĩ' => 'I', 'Ĭ' => 'I', 'Į' => 'I', - 'İ' => 'I', 'IJ' => 'IJ', 'Ĵ' => 'J', 'Ķ' => 'K', 'Ł' => 'K', 'Ľ' => 'K', - 'Ĺ' => 'K', 'Ļ' => 'K', 'Ŀ' => 'K', 'Ñ' => 'N', 'Ń' => 'N', 'Ň' => 'N', - 'Ņ' => 'N', 'Ŋ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', - 'Ö' => 'Oe', 'Ø' => 'O', 'Ō' => 'O', 'Ő' => 'O', 'Ŏ' => 'O', - 'Œ' => 'OE', 'Ŕ' => 'R', 'Ř' => 'R', 'Ŗ' => 'R', 'Ś' => 'S', 'Š' => 'S', - 'Ş' => 'S', 'Ŝ' => 'S', 'Ș' => 'S', 'Ť' => 'T', 'Ţ' => 'T', 'Ŧ' => 'T', - 'Ț' => 'T', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'Ue', 'Ū' => 'U', - 'Ů' => 'U', 'Ű' => 'U', 'Ŭ' => 'U', 'Ũ' => 'U', 'Ų' => 'U', - 'Ŵ' => 'W', 'Ý' => 'Y', 'Ŷ' => 'Y', 'Ÿ' => 'Y', 'Ź' => 'Z', 'Ž' => 'Z', - 'Ż' => 'Z', 'Þ' => 'T', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', - 'ä' => 'ae', 'å' => 'a', 'ā' => 'a', 'ą' => 'a', 'ă' => 'a', - 'æ' => 'ae', 'ç' => 'c', 'ć' => 'c', 'č' => 'c', 'ĉ' => 'c', 'ċ' => 'c', - 'ď' => 'd', 'đ' => 'd', 'ð' => 'd', 'è' => 'e', 'é' => 'e', 'ê' => 'e', - 'ë' => 'e', 'ē' => 'e', 'ę' => 'e', 'ě' => 'e', 'ĕ' => 'e', 'ė' => 'e', - 'ƒ' => 'f', 'ĝ' => 'g', 'ğ' => 'g', 'ġ' => 'g', 'ģ' => 'g', 'ĥ' => 'h', - 'ħ' => 'h', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ī' => 'i', - 'ĩ' => 'i', 'ĭ' => 'i', 'į' => 'i', 'ı' => 'i', 'ij' => 'ij', 'ĵ' => 'j', - 'ķ' => 'k', 'ĸ' => 'k', 'ł' => 'l', 'ľ' => 'l', 'ĺ' => 'l', 'ļ' => 'l', - 'ŀ' => 'l', 'ñ' => 'n', 'ń' => 'n', 'ň' => 'n', 'ņ' => 'n', 'ʼn' => 'n', - 'ŋ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'oe', - 'ø' => 'o', 'ō' => 'o', 'ő' => 'o', 'ŏ' => 'o', 'œ' => 'oe', - 'ŕ' => 'r', 'ř' => 'r', 'ŗ' => 'r', 'š' => 's', 'ù' => 'u', 'ú' => 'u', - 'û' => 'u', 'ū' => 'u', 'ü' => 'ue', 'ů' => 'u', 'ű' => 'u', - 'ŭ' => 'u', 'ũ' => 'u', 'ų' => 'u', 'ŵ' => 'w', 'ý' => 'y', 'ÿ' => 'y', - 'ŷ' => 'y', 'ž' => 'z', 'ż' => 'z', 'ź' => 'z', 'þ' => 't', 'ß' => 'ss', - 'ſ' => 'ss', 'ый' => 'iy', 'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G', - 'Д' => 'D', 'Е' => 'E', 'Ё' => 'YO', 'Ж' => 'ZH', 'З' => 'Z', 'И' => 'I', - 'Й' => 'Y', 'К' => 'K', 'Л' => 'L', 'М' => 'M', 'Н' => 'N', 'О' => 'O', - 'П' => 'P', 'Р' => 'R', 'С' => 'S', 'Т' => 'T', 'У' => 'U', 'Ф' => 'F', - 'Х' => 'H', 'Ц' => 'C', 'Ч' => 'CH', 'Ш' => 'SH', 'Щ' => 'SCH', 'Ъ' => '', - 'Ы' => 'Y', 'Ь' => '', 'Э' => 'E', 'Ю' => 'YU', 'Я' => 'YA', 'а' => 'a', - 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd', 'е' => 'e', 'ё' => 'yo', - 'ж' => 'zh', 'з' => 'z', 'и' => 'i', 'й' => 'y', 'к' => 'k', 'л' => 'l', - 'м' => 'm', 'н' => 'n', 'о' => 'o', 'п' => 'p', 'р' => 'r', 'с' => 's', - 'т' => 't', 'у' => 'u', 'ф' => 'f', 'х' => 'h', 'ц' => 'c', 'ч' => 'ch', - 'ш' => 'sh', 'щ' => 'sch', 'ъ' => '', 'ы' => 'y', 'ь' => '', 'э' => 'e', - 'ю' => 'yu', 'я' => 'ya' - ]; - - // make a human readable string - $text = strtr($text, $replace); - - // replace non letter or digits by - - $text = preg_replace('~[^\pL\d.]+~u', $divider, $text); - - // trim - $text = trim($text, $divider); - - // remove unwanted characters - $text = preg_replace('~[^-\w.]+~', '', $text); + $slugify = new Slugify(["separator" => "_"]); - return strtolower($text); + return $slugify->slugify($text); } // https://stackoverflow.com/a/33546903