diff --git a/.gitignore b/.gitignore
index 8b3e244..4cb3c8a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,6 @@
composer.lock
run-cache.php
.phpunit.result.cache
-.php-cs.fixer.cache
+.php-cs-fixer.cache
phpunit.xml
.note.md
diff --git a/.php-cs-fixer.cache b/.php-cs-fixer.cache
deleted file mode 100644
index c6a6c15..0000000
--- a/.php-cs-fixer.cache
+++ /dev/null
@@ -1 +0,0 @@
-{"php":"8.2.9","version":"3.24.0","indent":" ","lineEnding":"\n","rules":{"binary_operator_spaces":{"default":"at_least_single_space"},"blank_line_after_opening_tag":true,"blank_line_between_import_groups":true,"blank_lines_before_namespace":true,"class_definition":{"inline_constructor_arguments":false,"space_before_parenthesis":true},"compact_nullable_typehint":true,"curly_braces_position":{"allow_single_line_empty_anonymous_classes":true},"declare_equal_normalize":true,"lowercase_cast":true,"lowercase_static_reference":true,"new_with_braces":true,"no_blank_lines_after_class_opening":true,"no_leading_import_slash":true,"no_whitespace_in_blank_line":true,"ordered_class_elements":{"order":["use_trait"]},"ordered_imports":{"imports_order":["class","function","const"],"sort_algorithm":"none"},"return_type_declaration":true,"short_scalar_cast":true,"single_import_per_statement":{"group_to_single_imports":false},"single_trait_insert_per_statement":true,"ternary_operator_spaces":true,"visibility_required":true,"blank_line_after_namespace":true,"constant_case":true,"control_structure_braces":true,"control_structure_continuation_position":true,"elseif":true,"function_declaration":true,"indentation_type":true,"line_ending":true,"lowercase_keywords":true,"method_argument_space":{"on_multiline":"ensure_fully_multiline"},"no_break_comment":true,"no_closing_tag":true,"no_multiple_statements_per_line":true,"no_space_around_double_colon":true,"no_spaces_after_function_name":true,"no_trailing_whitespace":true,"no_trailing_whitespace_in_comment":true,"single_blank_line_at_eof":true,"single_class_element_per_statement":{"elements":["property"]},"single_line_after_imports":true,"spaces_inside_parentheses":true,"statement_indentation":true,"switch_case_semicolon_to_colon":true,"switch_case_space":true,"encoding":true,"full_opening_tag":true},"hashes":{"src\/Trait\/HasSelectorProperty.php":"8a2ea3e5316cac3317aeb4ab0c102d8e","src\/Trait\/HasAliasProperty.php":"e0a48b0bf89bfad94338a3453d213482","src\/Trait\/HasSourceProperty.php":"10ca30063c79170babfbf9cf2b1115a6","src\/Trait\/HasNodeProperty.php":"2ad45bd8aad43e1161349f6424722dea","src\/Trait\/HasRawProperty.php":"1d2749898a1f5013414609c07645bd89","src\/Trait\/HasCallbackProperty.php":"753ed7db315135f29114ac0a44191f82","src\/Trait\/HasOperatorProperty.php":"2820034730238d984aa086c3ae3728b6","src\/RegisterAdapter.php":"2280c1e2da84ca7e5713513bd426f8a1","src\/CallbackAdapter.php":"86aa99d17a05f7d5330835351f40f995","src\/Source.php":"3ba79634b9d506c415718849d2d8af4c","src\/Adapter\/DefaultCallbackAdapter.php":"8ada7e634b6d02e864f6a96790f45b8d","src\/Adapter\/ClosureCallbackAdapter.php":"c7cb28c2115265e6bf5a1cc26178aa8a","src\/Adapter\/AttributeCallbackAdapter.php":"9cbf992f1d677c5d70d4efc1c2e11109","src\/Adapter\/UpperCallbackAdapter.php":"4cfed9bf9fc856345a62ed809b635a33","src\/Adapter\/LowerCallbackAdapter.php":"034176fe6cac587fb19d1d4580779aea","src\/Adapter\/ReverseCallbackAdapter.php":"9153744dee7eefcef28922f5143bea98","src\/Adapter\/AppendNodeCallbackAdapter.php":"afeb24e1702d7ce72f3d92c3b4413a28","src\/Adapter\/LengthCallbackAdapter.php":"3409544679e32175b096c43bdc429a10","src\/Adapter\/ReplaceCallbackAdapter.php":"03e54dbc90ff70f99aaae2ef92eef88e","src\/Definer.php":"605b9e4ba7118de6e50780b60d65b8ed","src\/Parser.php":"89b831205b2a2cde933bab662d583d48","src\/Loader.php":"2dc6678e10448d308c1d415ac281836d","src\/DefinerExtractor.php":"4bb1cd8f8cd040d5be918a6d8c0061cc","src\/Loader\/DOMCrawlerLoader.php":"8aba0161c65863a40637488d49ec7970","src\/CqueryException.php":"83c5a32d1b73811297b6fdb2d07183ec","src\/Cquery.php":"8cb1419150a9feb43f79fcf5f745a008","src\/Support\/Collection.php":"49e18301b12a058d4b8fc396ca47d404","src\/Support\/RegExp.php":"5739693554a6e3d8dc77e1c0c7f4a8eb","src\/Support\/Str.php":"51f52dfad7bb5f3bf683123498b4345d","src\/Filter.php":"f61b19efce20a7dbbc924c4d4bf544fa","tests\/FilterCqueryTest.php":"0e01f835fd15e1fe6ea62060278238dd","tests\/SampleTest.php":"0609dc3489806e9ddec1209997e599ad","tests\/DefinerTest.php":"bc52332ba144ff4f2597749f8b67cac4","tests\/SourceTest.php":"d6c2f64db61d906c498519341e83ad9a","tests\/CadillacCarDatabaseTest.php":"5d142cebd6c3e6e7ffee039658aaf58c"}}
\ No newline at end of file
diff --git a/README.md b/README.md
index 7b0b7ba..cc86bcc 100644
--- a/README.md
+++ b/README.md
@@ -105,7 +105,7 @@ For example, you have a simple HTML element as shown below.
-### List function available
+### List definer available
Below are the functions you are can use, they may change over time.
**Note:** nested function has been supported.
| function | example | description |
diff --git a/composer.json b/composer.json
index 7868dca..e05b5f8 100644
--- a/composer.json
+++ b/composer.json
@@ -36,7 +36,8 @@
"symfony/http-client": "^5.4|^6.3",
"symfony/deprecation-contracts": "^2.5|^3.4",
"symfony/dom-crawler": "5.4|^6.3",
- "doctrine/collections": "^1.8|^2.1|^3.0"
+ "doctrine/collections": "^1.8|^2.1|^3.0",
+ "cocur/slugify": "dev-main"
},
"require-dev": {
"phpunit/phpunit": "^8.0|^9.0|^10.0",
diff --git a/src/Cquery.php b/src/Cquery.php
index a81b73e..ab427e0 100644
--- a/src/Cquery.php
+++ b/src/Cquery.php
@@ -33,13 +33,20 @@ class Cquery
*/
private $loader;
+ /**
+ * A variable used to store the results of a query
+ *
+ * @var \Doctrine\Common\Collections\ArrayCollection
+ *
+ * The default results is null
+ */
private $results;
/**
* Create a new Cquery instance.
*
- * @param \DOMNodeList|\DOMNode|string|null $source A source to use as the the source data, u can put html
- * content/url page to scrape default is null
+ * @param \DOMNodeList|\DOMNode|string|null $source A source to use as the the source data
+ * u can put html content/url page to scrape default is null
*
* @param string $contentType Type of Data Content to be Used as Data Source default is 'html'
*/
@@ -58,32 +65,32 @@ public function __construct(string $source = null, $contentType = "html")
}
/**
- * Adds a definer to the current source.
- *
+ * Adds a source based on data given.
* This method is used to determine the HTML element selector
* that will serve as a property in each array element.
*
- * @param \Cacing69\Cquery\Definer|string $picks a selector to grab on element
+ * @param string $value set a source element selector to activate query
* @return \Cacing69\Cquery\Cquery
- * @throws \Cacing69\Cquery\CqueryException when the provided parameter is incorrect."
*/
- public function define(...$defines): Cquery
+ public function from(string $value)
{
- $this->loader->define(...$defines);
+ $this->loader->from($value);
return $this;
}
/**
- * Adds a source based on data given.
+ * Adds a definer to the current source.
+ *
* This method is used to determine the HTML element selector
* that will serve as a property in each array element.
*
- * @param string $value set a source element selector to activate query
+ * @param \Cacing69\Cquery\Definer|string $picks a selector to grab on element
* @return \Cacing69\Cquery\Cquery
+ * @throws \Cacing69\Cquery\CqueryException when the provided parameter is incorrect."
*/
- public function from(string $value)
+ public function define(...$defines): Cquery
{
- $this->loader->from($value);
+ $this->loader->define(...$defines);
return $this;
}
@@ -102,7 +109,7 @@ public function limit(int $limit)
}
/**
- * Take a first reesult from result collection
+ * Take a first result from result collection
*
* @return array
*/
@@ -147,7 +154,7 @@ public function orFilter($node, $operator = null, $value = null): Cquery
}
/**
- * Take a result from query
+ * Take a result query from loader
*
* @return ArrayCollection
*/
@@ -188,38 +195,7 @@ public function getSource()
public function client($clientType)
{
- // $this->loader->setClientType($clientType);
-
+ $this->loader->setClientType($clientType);
return $this;
}
-
- public static function getAsync($results, $chunk)
- {
- $loop = Loop::get();
- $client = new Browser($loop);
- $results = array_chunk($results, 25);
-
- foreach ($results as $key => $_chunks) {
- foreach ($_chunks as $_key => $_result) {
- $client
- // ->withHeader("Key", "value")
- // ->withHeader("Key", "value")
- ->get($_result["url"])
- ->then(function (ResponseInterface $response) use (&$results, $key, $_key) {
- $detail = new Cquery((string) $response->getBody());
-
- $resultDetail = $detail
- ->from(".spec")
- ->define(
- ".specleft tr:nth-child(1) > td.data as price"
- )
- ->first();
- $results[$key][$_key]["price"] = $resultDetail["price"];
- });
- }
- $loop->run();
- }
-
- return array_merge(...$results);
- }
}
diff --git a/src/Loader.php b/src/Loader.php
index ff10abe..41579bf 100644
--- a/src/Loader.php
+++ b/src/Loader.php
@@ -16,8 +16,8 @@ abstract class Loader
{
use HasSourceProperty;
protected $limit = null;
- protected $clientType = "browser-kit";
protected $client;
+ protected $clientType = "browser-kit";
protected $uri = null;
protected $isRemote = false;
@@ -238,6 +238,12 @@ public function setCallbackCompose(Closure $closure)
return $this;
}
+ public function setClientType(string $clientType)
+ {
+ $this->clientType = $clientType;
+ return $this;
+ }
+
public function getResults()
{
return $this->results;
diff --git a/src/RegisterAdapter.php b/src/RegisterAdapter.php
index acaa146..b3813c4 100644
--- a/src/RegisterAdapter.php
+++ b/src/RegisterAdapter.php
@@ -11,6 +11,14 @@
use Cacing69\Cquery\Adapter\ReverseCallbackAdapter;
use Cacing69\Cquery\Adapter\UpperCallbackAdapter;
+/**
+ * RegisterAdapter used to register available adapters, this adapter is utilized during create definer and filter.
+ *
+ * @author Ibnul Mutaki
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
class RegisterAdapter
{
public static function load()
diff --git a/src/Source.php b/src/Source.php
index 0f255bd..43494a5 100644
--- a/src/Source.php
+++ b/src/Source.php
@@ -9,6 +9,14 @@
use Cacing69\Cquery\Trait\HasRawProperty;
use Symfony\Component\CssSelector\CssSelectorConverter;
+/**
+ * Source class used to define the source element to be scraped
+ *
+ * @author Ibnul Mutaki
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
class Source
{
use HasAliasProperty;
diff --git a/src/Support/Str.php b/src/Support/Str.php
index 28270fe..8b08bef 100644
--- a/src/Support/Str.php
+++ b/src/Support/Str.php
@@ -2,72 +2,15 @@
namespace Cacing69\Cquery\Support;
+use Cocur\Slugify\Slugify;
+
class Str
{
- // https://stackoverflow.com/a/2955521/10232729
- public static function slug($text, $divider = "_"): string
+ public static function slug($text): string
{
- $replace = [
- '<' => '', '>' => '', '-' => ' ', '&' => '', '.' => '',
- '"' => '', 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A',
- 'Ä' => 'A', 'Å' => 'A', 'Ā' => 'A', 'Ą' => 'A', 'Ă' => 'A', 'Æ' => 'Ae',
- 'Ç' => 'C', 'Ć' => 'C', 'Č' => 'C', 'Ĉ' => 'C', 'Ċ' => 'C', 'Ď' => 'D', 'Đ' => 'D',
- 'Ð' => 'D', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ē' => 'E',
- 'Ę' => 'E', 'Ě' => 'E', 'Ĕ' => 'E', 'Ė' => 'E', 'Ĝ' => 'G', 'Ğ' => 'G',
- 'Ġ' => 'G', 'Ģ' => 'G', 'Ĥ' => 'H', 'Ħ' => 'H', 'Ì' => 'I', 'Í' => 'I',
- 'Î' => 'I', 'Ï' => 'I', 'Ī' => 'I', 'Ĩ' => 'I', 'Ĭ' => 'I', 'Į' => 'I',
- 'İ' => 'I', 'IJ' => 'IJ', 'Ĵ' => 'J', 'Ķ' => 'K', 'Ł' => 'K', 'Ľ' => 'K',
- 'Ĺ' => 'K', 'Ļ' => 'K', 'Ŀ' => 'K', 'Ñ' => 'N', 'Ń' => 'N', 'Ň' => 'N',
- 'Ņ' => 'N', 'Ŋ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O',
- 'Ö' => 'Oe', 'Ø' => 'O', 'Ō' => 'O', 'Ő' => 'O', 'Ŏ' => 'O',
- 'Œ' => 'OE', 'Ŕ' => 'R', 'Ř' => 'R', 'Ŗ' => 'R', 'Ś' => 'S', 'Š' => 'S',
- 'Ş' => 'S', 'Ŝ' => 'S', 'Ș' => 'S', 'Ť' => 'T', 'Ţ' => 'T', 'Ŧ' => 'T',
- 'Ț' => 'T', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'Ue', 'Ū' => 'U',
- 'Ů' => 'U', 'Ű' => 'U', 'Ŭ' => 'U', 'Ũ' => 'U', 'Ų' => 'U',
- 'Ŵ' => 'W', 'Ý' => 'Y', 'Ŷ' => 'Y', 'Ÿ' => 'Y', 'Ź' => 'Z', 'Ž' => 'Z',
- 'Ż' => 'Z', 'Þ' => 'T', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a',
- 'ä' => 'ae', 'å' => 'a', 'ā' => 'a', 'ą' => 'a', 'ă' => 'a',
- 'æ' => 'ae', 'ç' => 'c', 'ć' => 'c', 'č' => 'c', 'ĉ' => 'c', 'ċ' => 'c',
- 'ď' => 'd', 'đ' => 'd', 'ð' => 'd', 'è' => 'e', 'é' => 'e', 'ê' => 'e',
- 'ë' => 'e', 'ē' => 'e', 'ę' => 'e', 'ě' => 'e', 'ĕ' => 'e', 'ė' => 'e',
- 'ƒ' => 'f', 'ĝ' => 'g', 'ğ' => 'g', 'ġ' => 'g', 'ģ' => 'g', 'ĥ' => 'h',
- 'ħ' => 'h', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ī' => 'i',
- 'ĩ' => 'i', 'ĭ' => 'i', 'į' => 'i', 'ı' => 'i', 'ij' => 'ij', 'ĵ' => 'j',
- 'ķ' => 'k', 'ĸ' => 'k', 'ł' => 'l', 'ľ' => 'l', 'ĺ' => 'l', 'ļ' => 'l',
- 'ŀ' => 'l', 'ñ' => 'n', 'ń' => 'n', 'ň' => 'n', 'ņ' => 'n', 'ʼn' => 'n',
- 'ŋ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'oe',
- 'ø' => 'o', 'ō' => 'o', 'ő' => 'o', 'ŏ' => 'o', 'œ' => 'oe',
- 'ŕ' => 'r', 'ř' => 'r', 'ŗ' => 'r', 'š' => 's', 'ù' => 'u', 'ú' => 'u',
- 'û' => 'u', 'ū' => 'u', 'ü' => 'ue', 'ů' => 'u', 'ű' => 'u',
- 'ŭ' => 'u', 'ũ' => 'u', 'ų' => 'u', 'ŵ' => 'w', 'ý' => 'y', 'ÿ' => 'y',
- 'ŷ' => 'y', 'ž' => 'z', 'ż' => 'z', 'ź' => 'z', 'þ' => 't', 'ß' => 'ss',
- 'ſ' => 'ss', 'ый' => 'iy', 'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G',
- 'Д' => 'D', 'Е' => 'E', 'Ё' => 'YO', 'Ж' => 'ZH', 'З' => 'Z', 'И' => 'I',
- 'Й' => 'Y', 'К' => 'K', 'Л' => 'L', 'М' => 'M', 'Н' => 'N', 'О' => 'O',
- 'П' => 'P', 'Р' => 'R', 'С' => 'S', 'Т' => 'T', 'У' => 'U', 'Ф' => 'F',
- 'Х' => 'H', 'Ц' => 'C', 'Ч' => 'CH', 'Ш' => 'SH', 'Щ' => 'SCH', 'Ъ' => '',
- 'Ы' => 'Y', 'Ь' => '', 'Э' => 'E', 'Ю' => 'YU', 'Я' => 'YA', 'а' => 'a',
- 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd', 'е' => 'e', 'ё' => 'yo',
- 'ж' => 'zh', 'з' => 'z', 'и' => 'i', 'й' => 'y', 'к' => 'k', 'л' => 'l',
- 'м' => 'm', 'н' => 'n', 'о' => 'o', 'п' => 'p', 'р' => 'r', 'с' => 's',
- 'т' => 't', 'у' => 'u', 'ф' => 'f', 'х' => 'h', 'ц' => 'c', 'ч' => 'ch',
- 'ш' => 'sh', 'щ' => 'sch', 'ъ' => '', 'ы' => 'y', 'ь' => '', 'э' => 'e',
- 'ю' => 'yu', 'я' => 'ya'
- ];
-
- // make a human readable string
- $text = strtr($text, $replace);
-
- // replace non letter or digits by -
- $text = preg_replace('~[^\pL\d.]+~u', $divider, $text);
-
- // trim
- $text = trim($text, $divider);
-
- // remove unwanted characters
- $text = preg_replace('~[^-\w.]+~', '', $text);
+ $slugify = new Slugify(["separator" => "_"]);
- return strtolower($text);
+ return $slugify->slugify($text);
}
// https://stackoverflow.com/a/33546903