From 7b41a289abe30864df6686b243f4a3726234cc1c Mon Sep 17 00:00:00 2001
From: darodi <4682830+darodi@users.noreply.github.com>
Date: Tue, 16 Apr 2024 22:03:20 +0200
Subject: [PATCH] [WEBSITE - INA] Complete rewrite

---
 resources/lib/websites/ina.py | 335 ++++------------------------------
 1 file changed, 37 insertions(+), 298 deletions(-)

diff --git a/resources/lib/websites/ina.py b/resources/lib/websites/ina.py
index d07049f9c..33d4a7313 100644
--- a/resources/lib/websites/ina.py
+++ b/resources/lib/websites/ina.py
@@ -1,22 +1,20 @@
 # -*- coding: utf-8 -*-
-# Copyright: (c) 2017, SylvainCecchetto
+# Copyright: (c) 2017, SylvainCecchetto; 2024, darodi
 # GNU General Public License v2.0+ (see LICENSE.txt or https://www.gnu.org/licenses/gpl-2.0.txt)
 
 # This file is part of Catch-up TV & More
 
 from __future__ import unicode_literals
-import base64
-import json
+
 import re
-import string
-import xml.etree.ElementTree as ET
 
-from codequick import Listitem, Resolver, Route, utils
-import htmlement
 import urlquick
+# noinspection PyUnresolvedReferences
+from codequick import Listitem, Resolver, Route, utils
+# noinspection PyUnresolvedReferences
+from codequick.utils import urljoin_partial
 
-from resources.lib import download
-from resources.lib.menu_utils import item_post_treatment
+from resources.lib.web_utils import html_parser
 
 # TO DO
 # Add Premium Account (purchase an account to test)
@@ -24,308 +22,49 @@
 # Fix Info add Premium Content
 
 URL_ROOT = 'http://www.ina.fr'
+url_constructor = urljoin_partial(URL_ROOT)
 
-URL_STREAM = 'https://player.ina.fr/notices/%s'
-# VideoId
+ASSET_URL_PATTERN = re.compile(r'asset-details-url=\"(.*?)\"')
 
 
 @Route.register
 def website_root(plugin, **kwargs):
-    """Build root listing"""
-    categories = [
-        ('Thèmes', list_subcategories, 'themes', ''),
-        ('Toutes les Personnalités', list_alpha, '184', 'classic'),
-        ('Toutes les Émissions', list_alpha, '196', 'classic'),
-        ('Toutes les Séries', list_alpha, '196', 'series'),
-        ('Dossiers', list_subcategories, 'dossiers', '')
-    ]
-
-    for category in categories:
-        item = Listitem()
-        item.label = category[0]
-        item.set_callback(category[1], category[2], category[3])
-        item_post_treatment(item)
-        yield item
-
-    # Search videos
-    item = Listitem.search(list_types, URL_ROOT)
-    item_post_treatment(item)
-    yield item
-
-
-@Route.register
-def list_subcategories(plugin, subcategory, **kwargs):
-    """Build subcateory listing"""
-    ina_html = urlquick.get(URL_ROOT).text.encode('utf-8')
-    ina = htmlement.fromstring(ina_html)
-    if subcategory == 'themes':
-        ina = ina.find('.//div[@class="menusThemes"]')
-        for sub in ina.iterfind('.//a'):
-            url = sub.get('href')
-            label = sub.text.encode('utf-8')
-            if url[-1] != '/' or label == 'Voir tout':
-                continue
-            item = Listitem()
-            item.label = label
-            item.set_callback(list_subsubcategories, url=URL_ROOT + url)
-            yield item
-    elif subcategory == 'dossiers':
-        ina = ina.find('.//div[@class="secondary-nav__dossiers"]')
-        for sub in ina.iterfind('.//a'):
-            url = sub.get('href')
-            label = sub.text.encode('utf-8')
-            if url.count('/') != 3 or url[-1] != '/' or label == 'Voir tout':
-                continue
-            item = Listitem()
-            item.label = label
-            item.set_callback(list_types, url=URL_ROOT + url)
-            yield item
-
+    # TODO Search videos
 
-@Route.register
-def list_alpha(plugin, js_file, mode, **kwargs):
-    """Build alpha listing choice (A, B, C, ..."""
-    range_l = [
-        ('Toutes', 'Toutes'),
-        ('#', '')
-    ]
-    for letter in list(string.ascii_uppercase):
-        range_l.append((letter, letter))
-
-    for range_elt in range_l:
+    resp = urlquick.get(url_constructor("/ina-eclaire-actu"))
+    root_elem = resp.parse("div", attrs={"id": "block-hub-content"})
+    for url_tag in root_elem.iterfind(".//div[@class='gtm-print-list']"):
         item = Listitem()
-        item.label = range_elt[0]
-        item.set_callback(
-            list_alpha2,
-            js_file=js_file,
-            mode=mode,
-            range_elt=range_elt[1])
+        item.label = url_tag.findtext(".//h2[@class='title-bloc']")
+        carrousel = url_tag.find(".//div[@d-role='carrousel']")
+        item.set_callback(list_carousel, carrousel=carrousel)
         yield item
 
 
 @Route.register
-def list_alpha2(plugin, js_file, mode, range_elt, page=1, **kwargs):
-    """Build categories listing after range choice"""
-    params_l = [
-        'order=asc',
-        'page=' + str(page),
-        'nbResults=48',
-        'mode=' + mode,
-        'range=' + range_elt
-    ]
-
-    url = URL_ROOT + '/blocs/rubrique_sommaire/' + js_file \
-        + '?' + '&'.join(params_l)
-
-    list_categories_text = urlquick.get(url).text.encode('utf-8')
-    list_categories_json = json.loads(list_categories_text)
-    categories = htmlement.fromstring(list_categories_json["html"])
-    cnt = 0
-    for categroy in categories.iterfind(".//div[@class='media']"):
-        cnt = cnt + 1
-        item = Listitem()
-        item.label = categroy.find('.//img').get('alt')
-        item.art['thumb'] = item.art['landscape'] = URL_ROOT + \
-            categroy.find('.//img').get('src')
-        url = URL_ROOT + categroy.find('.//a').get('href')
-
-        item.set_callback(list_types,
-                          url=url)
-        item_post_treatment(item)
-        yield item
-
-    if cnt == 48:
-        # More categories...
-        yield Listitem.next_page(
-            js_file=js_file,
-            mode=mode,
-            range_elt=range_elt,
-            page=page + 1)
-    elif cnt == 0:
-        plugin.notify(plugin.localize(30718), '')
-        yield False
-
-
-@Route.register
-def list_subsubcategories(plugin, url, **kwargs):
-    """Build subsubcategories listing"""
-    sub_html = urlquick.get(url).text.encode('utf-8')
-    sub = htmlement.fromstring(sub_html)
-    sub = sub.find('.//section[@id="stackSousThemes"]')
-    for fig in sub.iterfind('.//figure'):
+def list_carousel(plugin, carrousel, **kwargs):
+    for link in carrousel.iterfind('.//a'):
         item = Listitem()
-        img = fig.find('.//img')
-        item.label = img.get('alt')
-        item.art['thumb'] = item.art['landscape'] = URL_ROOT + img.get('src')
-        url = fig.find('.//a').get('href')
-        item.set_callback(list_types, url=URL_ROOT + url)
+        video_url = url_constructor(link.get('href'))
+        item.label = link.findtext(".//div[@class='title-bloc-small']")
+        image = link.find(".//img")
+        item.art["thumb"] = image.get("data-src")
+        item.set_callback(play_video, url=video_url)
         yield item
 
 
-@Route.register
-def list_types(plugin, url, search_query='', **kwargs):
-    """Build listing to choose contents type"""
-    # type=
-    content_types = [
-        ('Vidéos', 'video'),
-        ('Audios', 'audio'),
-        ('Pubs', 'pub')
-        # ('Dossiers', 'dossier')
-        # ('Créations Web', 'creationWeb')
-    ]
-    for content_type in content_types:
-        item = Listitem()
-        item.label = content_type[0]
-        item.set_callback(
-            list_sort,
-            url=url,
-            content_type=content_type[1],
-            search_query=search_query)
-        item_post_treatment(item)
-        yield item
-
-
-@Route.register
-def list_sort(plugin, url, content_type, search_query, **kwargs):
-    """Build listing to choose sort method"""
-    # s=
-    # sa=
-    sort_methods = [
-        ('Trier par : Pertinence', 'pertinence', 'desc'),
-        ('Trier par : Nombre de vues croissant', 'compteur_vue', 'asc'),
-        ('Trier par : Nombre de vues décroissant', 'compteur_vue', 'desc'),
-        ('Trier par : Date croissante', 'date_diffusion', 'asc'),
-        ('Trier par : Date décroissante', 'date_diffusion', 'desc'),
-        ('Trier par : Durée croissante', 'duree_totale', 'asc'),
-        ('Trier par : Durée décroissante', 'duree_totale', 'desc')
-    ]
-    for sort_method in sort_methods:
-        item = Listitem()
-        item.label = sort_method[0]
-        item.set_callback(
-            list_videos,
-            url=url,
-            content_type=content_type,
-            sort_method=sort_method[1],
-            sort_method_order=sort_method[2],
-            search_query=search_query)
-        item_post_treatment(item)
-        yield item
-
-
-@Route.register
-def list_videos(plugin, url, content_type, sort_method,
-                sort_method_order, search_query, start=0, **kwargs):
-    """Build videos listing"""
-    videos_html = urlquick.get(url).text
-    query = ''
-    must = ''
-    madelen_page = False
-    if search_query == '':
-        if 'executeQuery' in videos_html:
-            query = re.compile(r'OGP\.Search\.executeQuery(.*?);').findall(videos_html)[0]
-            if 'must' in query:
-                must = re.compile(r'must=(.*?)&').findall(query)[0]
-        else:
-            madelen_page = True
-
-    if madelen_page:
-        plugin.notify(plugin.localize(30712), '')
-        yield False
-    else:
-        query_l = [
-            'b=' + str(start),
-            'type=' + content_type,
-            'q=' + search_query,
-            's=' + sort_method,
-            'sa=' + sort_method_order,
-            'hf=48',
-            'must=' + must,
-            'block=true',
-            'target=www',
-            'resetParams=false'
-        ]
-
-        query_s = '%'.join(query_l)
-
-        videos_url = URL_ROOT + '/layout/set/ajax/recherche/result?' + base64.b64encode(query_s)
-        videos_html = urlquick.get(videos_url).text.encode('utf-8')
-        videos_html = videos_html.decode('unicode_escape')
-        videos_html = videos_html.replace('\\/', '/')
-        videos = htmlement.fromstring(videos_html)
-        cnt = 0
-        for episode in videos.iterfind(
-                ".//div[@class='media zoomarticle afficheNotices']"):
-            cnt = cnt + 1
-            item = Listitem()
-            item.label = 'No title'
-            if episode.find(".//div[@class='media-inapremium-slide']") is not None:
-                item.label = '[Ina Premium] ' + episode.find('.//img').get('alt')
-            else:
-                item.label = episode.find('.//img').get('alt')
-            try:
-                video_id = episode.find('.//a').get('href').split('/')[2]
-            except Exception:
-                continue
-            item.art['thumb'] = item.art['landscape'] = URL_ROOT + episode.find('.//img').get('src')
-            try:
-                video_duration_text_datas = episode.find(
-                    ".//span[@class='duration']").text.split(' ')
-                video_duration = 0
-                for video_duration_datas in video_duration_text_datas:
-                    if 's' in video_duration_datas:
-                        video_duration_datas = video_duration_datas.replace('s', '')
-                        video_duration = video_duration + int(video_duration_datas)
-                    elif 'm' in video_duration_datas:
-                        video_duration_datas = video_duration_datas.replace('m', '')
-                        video_duration = video_duration + (int(video_duration_datas) *
-                                                           60)
-                    elif 'h' in video_duration_datas:
-                        video_duration_datas = video_duration_datas.replace('h', '')
-                        video_duration = video_duration + (int(video_duration_datas) *
-                                                           3600)
-                item.info['duration'] = video_duration
-            except Exception:
-                pass
-
-            if episode.find(".//span[@class='broadcast']") is not None:
-                video_date = episode.find(".//span[@class='broadcast']").text
-                item.info.date(video_date, '%d/%m/%Y')
-
-            item.set_callback(get_video_url,
-                              video_id=video_id)
-            item_post_treatment(item, is_playable=True, is_downloadable=True)
-            yield item
-
-        if cnt == 48:
-            # More videos...
-            yield Listitem.next_page(
-                url=url,
-                content_type=content_type,
-                sort_method=sort_method,
-                sort_method_order=sort_method_order,
-                start=start + 48)
-        elif cnt == 0:
-            plugin.notify(plugin.localize(30718), '')
-            yield False
-
-
 @Resolver.register
-def get_video_url(plugin,
-                  video_id,
-                  download_mode=False,
-                  **kwargs):
-    """Get video URL and start video player"""
-    stream_xml = urlquick.get(URL_STREAM % video_id).text
-    stream_xml = utils.ensure_native_str(stream_xml)
-    stream_url = ''
-    xml_elements = ET.XML(stream_xml)
-    for item in xml_elements.findall('./channel/item'):
-        for child in item:
-            if child.tag == '{http://search.yahoo.com/mrss/}content':
-                stream_url = child.attrib['url']
-
-    if download_mode:
-        return download.download_video(stream_url)
-
-    return stream_url
+def play_video(plugin, url):
+    resp = urlquick.get(url, max_age=-1)
+    asset_url_array = ASSET_URL_PATTERN.findall(resp.text)
+    if len(asset_url_array) == 0:
+        return False
+    asset_url = asset_url_array[0]
+    asset_url_escaped = html_parser.unescape(asset_url)
+    json_resp_api = urlquick.get(asset_url_escaped, max_age=-1).json()
+    if "resourceUrl" in json_resp_api:
+        resource_url = json_resp_api["resourceUrl"]
+        # TODO use resolver_proxy.get_stream_with_quality
+
+        return resource_url
+    return False