Skip to content

Commit

Permalink
write some doc
Browse files Browse the repository at this point in the history
  • Loading branch information
cacing69 committed Sep 5, 2023
1 parent 304d7b1 commit 5839a73
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 113 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
composer.lock
run-cache.php
.phpunit.result.cache
.php-cs.fixer.cache
.php-cs-fixer.cache
phpunit.xml
.note.md
1 change: 0 additions & 1 deletion .php-cs-fixer.cache

This file was deleted.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ For example, you have a simple HTML element as shown below.

</details>

### List function available
### List definer available

Below are the functions you are can use, they may change over time. <br>**Note:** nested function has been supported.
| function | example | description |
Expand Down
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
"symfony/http-client": "^5.4|^6.3",
"symfony/deprecation-contracts": "^2.5|^3.4",
"symfony/dom-crawler": "5.4|^6.3",
"doctrine/collections": "^1.8|^2.1|^3.0"
"doctrine/collections": "^1.8|^2.1|^3.0",
"cocur/slugify": "dev-main"
},
"require-dev": {
"phpunit/phpunit": "^8.0|^9.0|^10.0",
Expand Down
68 changes: 22 additions & 46 deletions src/Cquery.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,20 @@ class Cquery
*/
private $loader;

/**
* A variable used to store the results of a query
*
* @var \Doctrine\Common\Collections\ArrayCollection
*
* The default results is null
*/
private $results;

/**
* Create a new Cquery instance.
*
* @param \DOMNodeList|\DOMNode|string|null $source A source to use as the the source data, u can put html
* content/url page to scrape default is null
* @param \DOMNodeList|\DOMNode|string|null $source A source to use as the the source data
* u can put html content/url page to scrape default is null
*
* @param string $contentType Type of Data Content to be Used as Data Source default is 'html'
*/
Expand All @@ -58,32 +65,32 @@ public function __construct(string $source = null, $contentType = "html")
}

/**
* Adds a definer to the current source.
*
* Adds a source based on data given.
* This method is used to determine the HTML element selector
* that will serve as a property in each array element.
*
* @param \Cacing69\Cquery\Definer|string $picks a selector to grab on element
* @param string $value set a source element selector to activate query
* @return \Cacing69\Cquery\Cquery
* @throws \Cacing69\Cquery\CqueryException when the provided parameter is incorrect."
*/
public function define(...$defines): Cquery
public function from(string $value)
{
$this->loader->define(...$defines);
$this->loader->from($value);
return $this;
}

/**
* Adds a source based on data given.
* Adds a definer to the current source.
*
* This method is used to determine the HTML element selector
* that will serve as a property in each array element.
*
* @param string $value set a source element selector to activate query
* @param \Cacing69\Cquery\Definer|string $picks a selector to grab on element
* @return \Cacing69\Cquery\Cquery
* @throws \Cacing69\Cquery\CqueryException when the provided parameter is incorrect."
*/
public function from(string $value)
public function define(...$defines): Cquery
{
$this->loader->from($value);
$this->loader->define(...$defines);
return $this;
}

Expand All @@ -102,7 +109,7 @@ public function limit(int $limit)
}

/**
* Take a first reesult from result collection
* Take a first result from result collection
*
* @return array
*/
Expand Down Expand Up @@ -147,7 +154,7 @@ public function orFilter($node, $operator = null, $value = null): Cquery
}

/**
* Take a result from query
* Take a result query from loader
*
* @return ArrayCollection
*/
Expand Down Expand Up @@ -188,38 +195,7 @@ public function getSource()

public function client($clientType)
{
// $this->loader->setClientType($clientType);

$this->loader->setClientType($clientType);
return $this;
}

public static function getAsync($results, $chunk)
{
$loop = Loop::get();
$client = new Browser($loop);
$results = array_chunk($results, 25);

foreach ($results as $key => $_chunks) {
foreach ($_chunks as $_key => $_result) {
$client
// ->withHeader("Key", "value")
// ->withHeader("Key", "value")
->get($_result["url"])
->then(function (ResponseInterface $response) use (&$results, $key, $_key) {
$detail = new Cquery((string) $response->getBody());

$resultDetail = $detail
->from(".spec")
->define(
".specleft tr:nth-child(1) > td.data as price"
)
->first();
$results[$key][$_key]["price"] = $resultDetail["price"];
});
}
$loop->run();
}

return array_merge(...$results);
}
}
8 changes: 7 additions & 1 deletion src/Loader.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ abstract class Loader
{
use HasSourceProperty;
protected $limit = null;
protected $clientType = "browser-kit";
protected $client;
protected $clientType = "browser-kit";

protected $uri = null;
protected $isRemote = false;
Expand Down Expand Up @@ -238,6 +238,12 @@ public function setCallbackCompose(Closure $closure)
return $this;
}

public function setClientType(string $clientType)
{
$this->clientType = $clientType;
return $this;
}

public function getResults()
{
return $this->results;
Expand Down
8 changes: 8 additions & 0 deletions src/RegisterAdapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@
use Cacing69\Cquery\Adapter\ReverseCallbackAdapter;
use Cacing69\Cquery\Adapter\UpperCallbackAdapter;

/**
* RegisterAdapter used to register available adapters, this adapter is utilized during create definer and filter.
*
* @author Ibnul Mutaki <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
class RegisterAdapter
{
public static function load()
Expand Down
8 changes: 8 additions & 0 deletions src/Source.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@
use Cacing69\Cquery\Trait\HasRawProperty;
use Symfony\Component\CssSelector\CssSelectorConverter;

/**
* Source class used to define the source element to be scraped
*
* @author Ibnul Mutaki <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
class Source
{
use HasAliasProperty;
Expand Down
67 changes: 5 additions & 62 deletions src/Support/Str.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,72 +2,15 @@

namespace Cacing69\Cquery\Support;

use Cocur\Slugify\Slugify;

class Str
{
// https://stackoverflow.com/a/2955521/10232729
public static function slug($text, $divider = "_"): string
public static function slug($text): string
{
$replace = [
'<' => '', '>' => '', '-' => ' ', '&' => '', '.' => '',
'"' => '', 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A',
'Ä' => 'A', 'Å' => 'A', 'Ā' => 'A', 'Ą' => 'A', 'Ă' => 'A', 'Æ' => 'Ae',
'Ç' => 'C', 'Ć' => 'C', 'Č' => 'C', 'Ĉ' => 'C', 'Ċ' => 'C', 'Ď' => 'D', 'Đ' => 'D',
'Ð' => 'D', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ē' => 'E',
'Ę' => 'E', 'Ě' => 'E', 'Ĕ' => 'E', 'Ė' => 'E', 'Ĝ' => 'G', 'Ğ' => 'G',
'Ġ' => 'G', 'Ģ' => 'G', 'Ĥ' => 'H', 'Ħ' => 'H', 'Ì' => 'I', 'Í' => 'I',
'Î' => 'I', 'Ï' => 'I', 'Ī' => 'I', 'Ĩ' => 'I', 'Ĭ' => 'I', 'Į' => 'I',
'İ' => 'I', 'IJ' => 'IJ', 'Ĵ' => 'J', 'Ķ' => 'K', 'Ł' => 'K', 'Ľ' => 'K',
'Ĺ' => 'K', 'Ļ' => 'K', 'Ŀ' => 'K', 'Ñ' => 'N', 'Ń' => 'N', 'Ň' => 'N',
'Ņ' => 'N', 'Ŋ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O',
'Ö' => 'Oe', 'Ø' => 'O', 'Ō' => 'O', 'Ő' => 'O', 'Ŏ' => 'O',
'Œ' => 'OE', 'Ŕ' => 'R', 'Ř' => 'R', 'Ŗ' => 'R', 'Ś' => 'S', 'Š' => 'S',
'Ş' => 'S', 'Ŝ' => 'S', 'Ș' => 'S', 'Ť' => 'T', 'Ţ' => 'T', 'Ŧ' => 'T',
'Ț' => 'T', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'Ue', 'Ū' => 'U',
'Ů' => 'U', 'Ű' => 'U', 'Ŭ' => 'U', 'Ũ' => 'U', 'Ų' => 'U',
'Ŵ' => 'W', 'Ý' => 'Y', 'Ŷ' => 'Y', 'Ÿ' => 'Y', 'Ź' => 'Z', 'Ž' => 'Z',
'Ż' => 'Z', 'Þ' => 'T', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a',
'ä' => 'ae', 'å' => 'a', 'ā' => 'a', 'ą' => 'a', 'ă' => 'a',
'æ' => 'ae', 'ç' => 'c', 'ć' => 'c', 'č' => 'c', 'ĉ' => 'c', 'ċ' => 'c',
'ď' => 'd', 'đ' => 'd', 'ð' => 'd', 'è' => 'e', 'é' => 'e', 'ê' => 'e',
'ë' => 'e', 'ē' => 'e', 'ę' => 'e', 'ě' => 'e', 'ĕ' => 'e', 'ė' => 'e',
'ƒ' => 'f', 'ĝ' => 'g', 'ğ' => 'g', 'ġ' => 'g', 'ģ' => 'g', 'ĥ' => 'h',
'ħ' => 'h', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ī' => 'i',
'ĩ' => 'i', 'ĭ' => 'i', 'į' => 'i', 'ı' => 'i', 'ij' => 'ij', 'ĵ' => 'j',
'ķ' => 'k', 'ĸ' => 'k', 'ł' => 'l', 'ľ' => 'l', 'ĺ' => 'l', 'ļ' => 'l',
'ŀ' => 'l', 'ñ' => 'n', 'ń' => 'n', 'ň' => 'n', 'ņ' => 'n', 'ʼn' => 'n',
'ŋ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'oe',
'ø' => 'o', 'ō' => 'o', 'ő' => 'o', 'ŏ' => 'o', 'œ' => 'oe',
'ŕ' => 'r', 'ř' => 'r', 'ŗ' => 'r', 'š' => 's', 'ù' => 'u', 'ú' => 'u',
'û' => 'u', 'ū' => 'u', 'ü' => 'ue', 'ů' => 'u', 'ű' => 'u',
'ŭ' => 'u', 'ũ' => 'u', 'ų' => 'u', 'ŵ' => 'w', 'ý' => 'y', 'ÿ' => 'y',
'ŷ' => 'y', 'ž' => 'z', 'ż' => 'z', 'ź' => 'z', 'þ' => 't', 'ß' => 'ss',
'ſ' => 'ss', 'ый' => 'iy', 'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G',
'Д' => 'D', 'Е' => 'E', 'Ё' => 'YO', 'Ж' => 'ZH', 'З' => 'Z', 'И' => 'I',
'Й' => 'Y', 'К' => 'K', 'Л' => 'L', 'М' => 'M', 'Н' => 'N', 'О' => 'O',
'П' => 'P', 'Р' => 'R', 'С' => 'S', 'Т' => 'T', 'У' => 'U', 'Ф' => 'F',
'Х' => 'H', 'Ц' => 'C', 'Ч' => 'CH', 'Ш' => 'SH', 'Щ' => 'SCH', 'Ъ' => '',
'Ы' => 'Y', 'Ь' => '', 'Э' => 'E', 'Ю' => 'YU', 'Я' => 'YA', 'а' => 'a',
'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd', 'е' => 'e', 'ё' => 'yo',
'ж' => 'zh', 'з' => 'z', 'и' => 'i', 'й' => 'y', 'к' => 'k', 'л' => 'l',
'м' => 'm', 'н' => 'n', 'о' => 'o', 'п' => 'p', 'р' => 'r', 'с' => 's',
'т' => 't', 'у' => 'u', 'ф' => 'f', 'х' => 'h', 'ц' => 'c', 'ч' => 'ch',
'ш' => 'sh', 'щ' => 'sch', 'ъ' => '', 'ы' => 'y', 'ь' => '', 'э' => 'e',
'ю' => 'yu', 'я' => 'ya'
];

// make a human readable string
$text = strtr($text, $replace);

// replace non letter or digits by -
$text = preg_replace('~[^\pL\d.]+~u', $divider, $text);

// trim
$text = trim($text, $divider);

// remove unwanted characters
$text = preg_replace('~[^-\w.]+~', '', $text);
$slugify = new Slugify(["separator" => "_"]);

return strtolower($text);
return $slugify->slugify($text);
}

// https://stackoverflow.com/a/33546903
Expand Down

0 comments on commit 5839a73

Please sign in to comment.