Skip to content

Commit

Permalink
Fix parsing imgur links, closes #17
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Henning Thorsen committed Sep 16, 2020
1 parent 459043e commit ef2012b
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 27 deletions.
3 changes: 3 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
Revision history for perl distribution LinkEmbedder

1.13 Not Released
- Fix parsing imgur links #17

1.12 2020-03-24T12:12:10+0900
- Add support for reading X-Provider-Name from headers
- Will serve plain text as a paste
Expand Down
13 changes: 7 additions & 6 deletions lib/LinkEmbedder/Link.pm
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ my %DOM_SEL = (
':desc' => ['meta[property="og:description"]', 'meta[name="twitter:description"]', 'meta[name="description"]'],
':image' => ['meta[property="og:image"]', 'meta[property="og:image:url"]', 'meta[name="twitter:image"]'],
':site_name' => ['meta[property="og:site_name"]', 'meta[property="twitter:site"]'],
':title' => ['meta[property="og:title"]', 'meta[name="twitter:title"]', 'title'],
':title' => ['meta[property="og:title"]', 'meta[name="twitter:title"]', 'title'],
);

my @JSON_ATTRS = (
Expand All @@ -33,15 +33,15 @@ has provider_name => sub {
return $name =~ /([^\.]+)\.(\w+)$/ ? ucfirst $1 : $name;
};

has provider_url => sub { $_[0]->url->host ? $_[0]->url->clone->path('/') : undef };
has template => sub { [__PACKAGE__, sprintf '%s.html.ep', $_[0]->type] };
has provider_url => sub { $_[0]->url->host ? $_[0]->url->clone->path('/') : undef };
has template => sub { [__PACKAGE__, sprintf '%s.html.ep', $_[0]->type] };
has thumbnail_height => undef;
has thumbnail_url => undef;
has thumbnail_width => undef;
has title => undef;
has type => 'link';
has ua => undef; # Mojo::UserAgent object
has url => sub { Mojo::URL->new }; # Mojo::URL
has ua => undef; # Mojo::UserAgent object
has url => sub { Mojo::URL->new }; # Mojo::URL
has version => '1.0';
has width => sub { $_[0]->type =~ /^photo|video$/ ? 0 : undef };

Expand Down Expand Up @@ -338,7 +338,8 @@ __DATA__
</div>
@@ photo.html.ep
<div class="le-<%= $l->type %> le-provider-<%= lc $l->provider_name %>">
<img src="<%= $l->url %>" alt="<%= $l->title %>">
% my $thumbnail_url = $l->thumbnail_url || $l->url;
<img src="<%= $thumbnail_url %>" alt="<%= $l->title %>">
</div>
@@ rich.html.ep
% if ($l->title) {
Expand Down
18 changes: 15 additions & 3 deletions lib/LinkEmbedder/Link/Imgur.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,27 @@ package LinkEmbedder::Link::Imgur;
use Mojo::Base 'LinkEmbedder::Link';

has provider_name => 'Imgur';
has provider_url => sub { Mojo::URL->new('https://imgur.com') };
has provider_url => sub { Mojo::URL->new('https://imgur.com') };

sub learn_p {
my $self = shift;
my $url = $self->url;
return $self->SUPER::learn_p(@_) if @{$url->path} != 1 and $url->path->[0] !~ m!^\w+$!;

$url = $url->clone;
push @{$url->path}, 'embed';
return $self->_get_p($url)->then(sub { $self->_learn(shift) });
}

sub _learn_from_dom {
my ($self, $dom) = @_;
$self->SUPER::_learn_from_dom($dom);
$self->title('Attempt to sit still until cat decides to move. via #reddit') unless $self->title;

my $el = $dom->at('[name="twitter:image"]') or return;
my $el = $dom->at('img.post[src]') or return;
my $url = Mojo::URL->new($el->{src})->scheme('https');
$self->height(0)->width(0)->type('photo');
$self->url(Mojo::URL->new($el->{content}));
$self->thumbnail_url($url->to_string);
}

1;
36 changes: 18 additions & 18 deletions t/imgur.t
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,32 @@ plan skip_all => 'cpanm IO::Socket::SSL' unless LinkEmbedder::TLS;

my $embedder = LinkEmbedder->new(force_secure => 1);
my $link;

#$embedder->ua->transactor->name('Mozilla/5.0 (Android 11; Mobile; rv:68.0) Gecko/68.0 Firefox/80.0');
$embedder->get_p('http://imgur.com/w3cmS')->then(sub { $link = shift })->wait; # exists since Jan 2, 2011
isa_ok($link, 'LinkEmbedder::Link::Imgur');
is_deeply $link->TO_JSON,
{
cache_age => 0,
height => 0,
html => photo_html(),
provider_name => 'Imgur',
provider_url => 'https://imgur.com',
thumbnail_height => 315,
thumbnail_url => 'https://i.imgur.com/w3cmS.png?fb',
thumbnail_width => 600,
title => 'Attempt to sit still until cat decides to move. via #reddit',
type => 'photo',
url => 'https://i.imgur.com/w3cmS.png',
version => '1.0',
width => 0,
cache_age => 0,
height => 0,
html => photo_html(),
provider_name => 'Imgur',
provider_url => 'https://imgur.com',
thumbnail_url => 'https://i.imgur.com/w3cmSl.png',
title => 'Attempt to sit still until cat decides to move. via #reddit',
type => 'photo',
url => 'https://imgur.com/w3cmS',
version => '1.0',
width => 0,
},
'json for imgur.com'
or note $link->_dump;

note 'Make sure that force_secure=1 works';
$embedder->get_p('http://imgur.com/w3cmS.png')->then(sub { $link = shift })->wait;
$embedder->get_p('http://imgur.com/w3cmSl.png')->then(sub { $link = shift })->wait;
isa_ok($link, 'LinkEmbedder::Link::Imgur');
my $html = photo_html();
$html =~ s!alt="[^"]+"!alt="w3cmS.png"!;
$html =~ s!alt="[^"]+"!alt="w3cmSl.png"!;
$html =~ s!i\.imgur\.com!imgur.com!;
is_deeply $link->TO_JSON,
{
Expand All @@ -41,9 +41,9 @@ is_deeply $link->TO_JSON,
html => $html,
provider_name => 'Imgur',
provider_url => 'https://imgur.com',
title => 'w3cmS.png',
title => 'w3cmSl.png',
type => 'photo',
url => 'https://imgur.com/w3cmS.png',
url => 'https://imgur.com/w3cmSl.png',
version => '1.0',
width => 0,
},
Expand All @@ -55,7 +55,7 @@ done_testing;
sub photo_html {
return <<'HERE';
<div class="le-photo le-provider-imgur">
<img src="https://i.imgur.com/w3cmS.png" alt="Attempt to sit still until cat decides to move. via #reddit">
<img src="https://i.imgur.com/w3cmSl.png" alt="Attempt to sit still until cat decides to move. via #reddit">
</div>
HERE
}

0 comments on commit ef2012b

Please sign in to comment.