From ad58b72c07da852a01904c01b2648d4c803a24f1 Mon Sep 17 00:00:00 2001 From: laatonwalabhoot Date: Thu, 11 Jan 2024 03:16:18 +0530 Subject: [PATCH 1/2] feat: kmp --- crux-kmp/build.gradle.kts | 44 ++++ .../com/chimbori/crux_kmp/Platform.android.kt | 7 + .../kotlin/com/chimbori/crux_kmp/Crux.kt | 93 +++++++ .../kotlin/com/chimbori/crux_kmp/Platform.kt | 7 + .../com/chimbori/crux_kmp/api/Fields.kt | 39 +++ .../com/chimbori/crux_kmp/api/Plugins.kt | 39 +++ .../com/chimbori/crux_kmp/api/Resource.kt | 46 ++++ .../crux_kmp/common/HttpExtensions.kt | 101 ++++++++ .../crux_kmp/common/HttpUrlExtensions.kt | 231 ++++++++++++++++++ .../crux_kmp/common/JsoupExtensions.kt | 14 ++ .../com/chimbori/crux_kmp/common/Log.kt | 32 +++ .../crux_kmp/common/NumberExtensions.kt | 9 + .../crux_kmp/common/StringExtensions.kt | 43 ++++ .../crux_kmp/extractors/ImageUrlExtractor.kt | 52 ++++ .../crux_kmp/extractors/LinkUrlExtractor.kt | 29 +++ .../crux_kmp/extractors/MetadataHelpers.kt | 132 ++++++++++ .../crux_kmp/plugins/AmpRedirector.kt | 37 +++ .../crux_kmp/plugins/FacebookUrlRewriter.kt | 19 ++ .../crux_kmp/plugins/FaviconExtractor.kt | 29 +++ .../crux_kmp/plugins/GoogleUrlRewriter.kt | 21 ++ .../crux_kmp/plugins/HtmlMetadataExtractor.kt | 83 +++++++ .../plugins/TrackingParameterRemover.kt | 107 ++++++++ .../crux_kmp/plugins/WebAppManifestParser.kt | 78 ++++++ .../com/chimbori/crux_kmp/Platform.ios.kt | 9 + crux/.gitignore | 1 + build.gradle.kts => crux/build.gradle.kts | 2 + crux/consumer-rules.pro | 0 crux/proguard-rules.pro | 21 ++ .../main/kotlin/com/chimbori/crux/Crux.kt | 0 .../kotlin/com/chimbori/crux/api/Fields.kt | 0 .../kotlin/com/chimbori/crux/api/Plugins.kt | 0 .../kotlin/com/chimbori/crux/api/Resource.kt | 0 .../chimbori/crux/common/HttpUrlExtensions.kt | 0 .../chimbori/crux/common/JsoupExtensions.kt | 0 .../kotlin/com/chimbori/crux/common/Log.kt | 0 .../chimbori/crux/common/NumberExtensions.kt | 0 .../chimbori/crux/common/OkHttpExtensions.kt | 0 .../chimbori/crux/common/StringExtensions.kt | 0 .../crux/extractors/ImageUrlExtractor.kt | 0 .../crux/extractors/LinkUrlExtractor.kt | 0 .../crux/extractors/MetadataHelpers.kt | 0 .../chimbori/crux/plugins/AmpRedirector.kt | 0 .../crux/plugins/FacebookUrlRewriter.kt | 0 .../chimbori/crux/plugins/FaviconExtractor.kt | 0 .../crux/plugins/GoogleUrlRewriter.kt | 0 .../crux/plugins/HtmlMetadataExtractor.kt | 0 .../crux/plugins/TrackingParameterRemover.kt | 0 .../crux/plugins/WebAppManifestParser.kt | 0 .../test/kotlin/com/chimbori/crux/CruxTest.kt | 0 .../com/chimbori/crux/api/ResourceTest.kt | 0 .../crux/common/HttpUrlExtensionsTest.kt | 0 .../crux/common/NumberExtensionsTest.kt | 0 .../crux/common/OkHttpExtensionsTest.kt | 0 .../crux/common/StringExtensionsTest.kt | 0 .../com/chimbori/crux/common/TestHelper.kt | 0 .../crux/extractors/ImageUrlExtractorTest.kt | 0 .../crux/extractors/LinkUrlExtractorTest.kt | 0 .../crux/extractors/MetadataHelpersTest.kt | 0 .../crux/plugins/AmpRedirectorTest.kt | 0 .../crux/plugins/FacebookUrlRewriterTest.kt | 0 .../crux/plugins/FaviconExtractorTest.kt | 0 .../crux/plugins/GoogleUrlRewriterTest.kt | 0 .../crux/plugins/HtmlMetadataExtractorTest.kt | 0 .../plugins/TrackingParameterRemoverTest.kt | 0 .../crux/plugins/WebAppManifestParserTest.kt | 0 .../chimbori/sample/KotlinPublicAPITest.kt | 0 gradle/libs.versions.toml | 24 +- settings.gradle.kts | 22 +- 68 files changed, 1369 insertions(+), 2 deletions(-) create mode 100644 crux-kmp/build.gradle.kts create mode 100644 crux-kmp/src/androidMain/kotlin/com/chimbori/crux_kmp/Platform.android.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/Crux.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/Platform.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/api/Fields.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/api/Plugins.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/api/Resource.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/HttpExtensions.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/HttpUrlExtensions.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/JsoupExtensions.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/Log.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/NumberExtensions.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/StringExtensions.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/extractors/ImageUrlExtractor.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/extractors/LinkUrlExtractor.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/extractors/MetadataHelpers.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/AmpRedirector.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/FacebookUrlRewriter.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/FaviconExtractor.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/GoogleUrlRewriter.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/HtmlMetadataExtractor.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/TrackingParameterRemover.kt create mode 100644 crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/WebAppManifestParser.kt create mode 100644 crux-kmp/src/iosMain/kotlin/com/chimbori/crux_kmp/Platform.ios.kt create mode 100644 crux/.gitignore rename build.gradle.kts => crux/build.gradle.kts (91%) create mode 100644 crux/consumer-rules.pro create mode 100644 crux/proguard-rules.pro rename {src => crux/src}/main/kotlin/com/chimbori/crux/Crux.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/api/Fields.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/api/Plugins.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/api/Resource.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/common/HttpUrlExtensions.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/common/JsoupExtensions.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/common/Log.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/common/NumberExtensions.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/common/OkHttpExtensions.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/common/StringExtensions.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/extractors/ImageUrlExtractor.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/extractors/LinkUrlExtractor.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/extractors/MetadataHelpers.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/plugins/AmpRedirector.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/plugins/FacebookUrlRewriter.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/plugins/FaviconExtractor.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/plugins/GoogleUrlRewriter.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/plugins/HtmlMetadataExtractor.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/plugins/TrackingParameterRemover.kt (100%) rename {src => crux/src}/main/kotlin/com/chimbori/crux/plugins/WebAppManifestParser.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/CruxTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/api/ResourceTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/common/HttpUrlExtensionsTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/common/NumberExtensionsTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/common/OkHttpExtensionsTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/common/StringExtensionsTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/common/TestHelper.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/extractors/ImageUrlExtractorTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/extractors/LinkUrlExtractorTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/extractors/MetadataHelpersTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/plugins/AmpRedirectorTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/plugins/FacebookUrlRewriterTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/plugins/FaviconExtractorTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/plugins/GoogleUrlRewriterTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/plugins/HtmlMetadataExtractorTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/plugins/TrackingParameterRemoverTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/crux/plugins/WebAppManifestParserTest.kt (100%) rename {src => crux/src}/test/kotlin/com/chimbori/sample/KotlinPublicAPITest.kt (100%) diff --git a/crux-kmp/build.gradle.kts b/crux-kmp/build.gradle.kts new file mode 100644 index 00000000..7cc17e06 --- /dev/null +++ b/crux-kmp/build.gradle.kts @@ -0,0 +1,44 @@ +plugins { + alias(libs.plugins.kotlinMultiplatform) + alias(libs.plugins.androidLibrary) +} + +kotlin { + androidTarget { + compilations.all { + kotlinOptions { + jvmTarget = "1.8" + } + } + } + + listOf( + iosX64(), + iosArm64(), + iosSimulatorArm64() + ).forEach { + it.binaries.framework { + baseName = "shared" + isStatic = true + } + } + + sourceSets { + commonMain.dependencies { + implementation(libs.ktor.client.core) + implementation(libs.ksoup) + implementation(libs.klaxon) + } + commonTest.dependencies { + implementation(libs.kotlin.test) + } + } +} + +android { + namespace = "com.chimbori.crux_kmp" + compileSdk = 34 + defaultConfig { + minSdk = 24 + } +} diff --git a/crux-kmp/src/androidMain/kotlin/com/chimbori/crux_kmp/Platform.android.kt b/crux-kmp/src/androidMain/kotlin/com/chimbori/crux_kmp/Platform.android.kt new file mode 100644 index 00000000..84b6cdde --- /dev/null +++ b/crux-kmp/src/androidMain/kotlin/com/chimbori/crux_kmp/Platform.android.kt @@ -0,0 +1,7 @@ +package com.chimbori.crux_kmp + +class AndroidPlatform : Platform { + override val name: String = "Android ${android.os.Build.VERSION.SDK_INT}" +} + +actual fun getPlatform(): Platform = AndroidPlatform() \ No newline at end of file diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/Crux.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/Crux.kt new file mode 100644 index 00000000..73f485a2 --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/Crux.kt @@ -0,0 +1,93 @@ +package com.chimbori.crux + +import com.chimbori.crux_kmp.api.Extractor +import com.chimbori.crux_kmp.api.Plugin +import com.chimbori.crux_kmp.api.Resource +import com.chimbori.crux_kmp.api.Rewriter +import com.chimbori.crux_kmp.common.CHROME_USER_AGENT +import com.chimbori.crux_kmp.plugins.AmpRedirector +import com.chimbori.crux_kmp.plugins.FacebookUrlRewriter +import com.chimbori.crux_kmp.plugins.FaviconExtractor +import com.chimbori.crux_kmp.plugins.GoogleUrlRewriter +import com.chimbori.crux_kmp.plugins.HtmlMetadataExtractor +import com.chimbori.crux_kmp.plugins.TrackingParameterRemover +import com.chimbori.crux_kmp.plugins.WebAppManifestParser +import com.fleeksoft.ksoup.nodes.Document +import io.ktor.client.HttpClient +import io.ktor.http.Url +import io.ktor.http.headers +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.IO +import kotlinx.coroutines.withContext + +/** + * An ordered list of default plugins configured in Crux. Callers can override and provide their own list, or pick and + * choose from the set of available default plugins to create their own configuration. + */ +public fun createDefaultPlugins(httpClient: HttpClient): List = listOf( + // Rewriters + + // Static redirectors go first, to avoid getting stuck into CAPTCHAs. + GoogleUrlRewriter(), + FacebookUrlRewriter(), + // Remove any tracking parameters remaining. + TrackingParameterRemover(), + + // Extractors + + // Parses many standard HTML metadata attributes. Fetches the Web page, so this must be the first [Extractor]. + HtmlMetadataExtractor(httpClient), + // Prefer canonical URLs over AMP URLs. + AmpRedirector(refetchContentFromCanonicalUrl = true, httpClient), + // Fetches and parses the Web Manifest. May replace existing favicon URL with one from the manifest.json. + WebAppManifestParser(httpClient), + // Extracts the best possible favicon from all the markup available on the page itself. + FaviconExtractor(), +) + +/** + * Crux can be configured with a set of plugins, including custom ones, in sequence. Each plugin can optionally process + * resource metadata, can make additional HTTP requests if necessary, and pass along updated metadata to the next plugin + * in the chain. + */ +public class Crux( + /** Select from available plugins, or provide custom plugins for Crux to use. */ + private val plugins: List? = null, + + /** If the calling app has its own instance of [HttpClient], use it, otherwise Crux can create and use its own. */ + httpClient: HttpClient = createCruxOkHttpClient(), +) { + + private val activePlugins: List = plugins ?: createDefaultPlugins(httpClient) + + /** + * Processes the provided URL, and returns a metadata object containing custom fields. + * @param originalUrl the URL to extract metadata and content from. + * @param parsedDoc if the calling app already has access to a parsed DOM tree, Crux can reuse it instead of + * re-parsing it. If a custom [Document] is provided, Crux will not make any HTTP requests itself, and may not follow + * HTTP redirects (but plugins may still optionally make additional HTTP requests themselves.) + */ + public suspend fun extractFrom(originalUrl: Url, parsedDoc: Document? = null): Resource = + withContext(Dispatchers.IO) { + val rewrittenUrl = activePlugins + .filterIsInstance() + .fold(originalUrl) { rewrittenUrl, rewriter -> rewriter.rewrite(rewrittenUrl) } + + activePlugins + .filterIsInstance() + .fold(Resource(url = rewrittenUrl, document = parsedDoc)) { resource, extractor -> + if (extractor.canExtract(resource.url ?: rewrittenUrl)) { + resource + extractor.extract(resource) + } else { + resource + } + }.removeNullValues() + } +} + +internal fun createCruxOkHttpClient(): HttpClient = HttpClient { + followRedirects = true + headers { + append("User-Agent", CHROME_USER_AGENT) + } +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/Platform.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/Platform.kt new file mode 100644 index 00000000..4d9c2f92 --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/Platform.kt @@ -0,0 +1,7 @@ +package com.chimbori.crux_kmp + +interface Platform { + val name: String +} + +expect fun getPlatform(): Platform \ No newline at end of file diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/api/Fields.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/api/Fields.kt new file mode 100644 index 00000000..85f8fd6a --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/api/Fields.kt @@ -0,0 +1,39 @@ +package com.chimbori.crux_kmp.api + +/** Well-known keys to use in [Resource.metadata]. */ +public object Fields { + public const val TITLE: String = "title" + public const val DESCRIPTION: String = "description" + public const val SITE_NAME: String = "site-name" + public const val LANGUAGE: String = "language" + public const val DISPLAY: String = "display" + public const val ORIENTATION: String = "orientation" + public const val PUBLISHED_AT: String = "published_at" + public const val MODIFIED_AT: String = "modified_at" + + public const val THEME_COLOR_HEX: String = "theme-color-hex" + public const val THEME_COLOR_HTML: String = "theme-color-html" // Named colors like "aliceblue" + public const val BACKGROUND_COLOR_HEX: String = "background-color-hex" + public const val BACKGROUND_COLOR_HTML: String = "background-color-html" // Named colors like "aliceblue" + + public const val CANONICAL_URL: String = "canonical-url" + public const val AMP_URL: String = "amp-url" + public const val FAVICON_URL: String = "favicon-url" + public const val BANNER_IMAGE_URL: String = "banner-image-url" + public const val FEED_URL: String = "feed-url" + public const val VIDEO_URL: String = "video-url" + public const val WEB_APP_MANIFEST_URL: String = "web-app-manifest-url" // https://www.w3.org/TR/appmanifest/ + public const val NEXT_PAGE_URL: String = "next-page-url" + public const val PREVIOUS_PAGE_URL: String = "previous-page-url" + + // For image or video resources only. + public const val ALT_TEXT: String = "alt-text" + public const val WIDTH_PX: String = "width-px" + public const val HEIGHT_PX: String = "height-px" + + // For articles (estimated reading time) and audio/video content (playback duration). + public const val DURATION_MS: String = "duration-ms" + + public const val TWITTER_HANDLE: String = "twitter-handle" + public const val KEYWORDS_CSV: String = "keywords-csv" +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/api/Plugins.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/api/Plugins.kt new file mode 100644 index 00000000..24ffbb98 --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/api/Plugins.kt @@ -0,0 +1,39 @@ +package com.chimbori.crux_kmp.api + +import io.ktor.http.Url + +public sealed interface Plugin + +/** + * Rewriters are plugins that can modify the URL before it’s processed by other plugins. They should not have access + * to the network, and should execute quickly on the main thread if necessary. + */ +public fun interface Rewriter : Plugin { + public fun rewrite(url: Url): Url +} + +/** + * Crux is designed as a chain of plugins, each of which can optionally handle URLs passed to it. Each plugin is + * provided a fully-parsed HTML DOM to extract fields from, and can also make additional HTTP requests if necessary to + * retrieve additional metadata or to follow redirects. + * + * Metadata fields can be set via the [Resource.metadata] property. Plugins can also rewrite the canonical URL, and can + * provide an updated DOM tree if the canonical URL is changed. The updated URL and DOM tree will be passed on to the + * next plugin in sequence, so the exact ordering of plugins is important. + */ +public interface Extractor : Plugin { + /** + * @param url URL for the resource being processed by Crux. + * @return true if this plugin can handle the URL, false otherwise. Plugins can only inspect the [HttpUrl], without + * being able to peek at the content. + */ + public fun canExtract(url: Url): Boolean + + /** + * @param request metadata & DOM content for the request being handled. + * @return a partially populated [Resource] with newly-extracted fields. Include only those fields that need to be + * set or updated; they will be merged with the set of previously-extracted fields. If no fields need to be updated, + * return `null`. + */ + public suspend fun extract(request: Resource): Resource? +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/api/Resource.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/api/Resource.kt new file mode 100644 index 00000000..e4dadd69 --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/api/Resource.kt @@ -0,0 +1,46 @@ +package com.chimbori.crux_kmp.api + +import com.fleeksoft.ksoup.nodes.Document +import com.fleeksoft.ksoup.nodes.Element +import io.ktor.http.Url + + +/** A [Resource] encapculates metadata and content related to an HTTP resource. */ +public data class Resource( + /** Canonical URL for this resource. */ + val url: Url? = null, + + /** Parsed DOM tree for this resource, if available. */ + val document: Document? = null, + + /** + * Extracted and cleaned-up DOM tree for this resource, if available. + * If this is null, then article extraction has not been performed, or has failed. + */ + val article: Element? = null, + + /** A holder for any kind of custom objects that library users may want to use. */ + val metadata: Map = emptyMap(), +) { + /** @return value of a named field in [Resource.metadata]. */ + public operator fun get(key: String): Any? = metadata[key] + + /** + * Merges non-null fields from another [Resource] with this object, and returns a new immutable object. Prefer to use + * this operator instead of manually merging the two objects, so that all fields are correctly merged and not clobbered. + */ + public operator fun plus(anotherResource: Resource?): Resource = Resource( + url = anotherResource?.url ?: url, + document = anotherResource?.document ?: document, + article = anotherResource?.article ?: article, + metadata = if (anotherResource?.metadata == null) metadata else metadata + anotherResource.metadata, + ) + + /** Removes an immutable copy of this [Resource] that only contains non-null values for each key in [metadata]. */ + public fun removeNullValues(): Resource = copy( + metadata = metadata.filterValues { it != null }, + ) + + /** For any potential extension functions to be defined on the companion object. */ + public companion object +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/HttpExtensions.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/HttpExtensions.kt new file mode 100644 index 00000000..c64262df --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/HttpExtensions.kt @@ -0,0 +1,101 @@ +package com.chimbori.crux_kmp.common + +import com.chimbori.crux_kmp.api.Resource +import com.fleeksoft.ksoup.Ksoup +import com.fleeksoft.ksoup.nodes.Document +import com.fleeksoft.ksoup.ported.BufferReader +import io.ktor.client.HttpClient +import io.ktor.client.plugins.ResponseException +import io.ktor.client.request.HttpRequestBuilder +import io.ktor.client.request.request +import io.ktor.client.request.url +import io.ktor.client.statement.HttpResponse +import io.ktor.client.statement.readBytes +import io.ktor.client.statement.request +import io.ktor.http.HttpMethod +import io.ktor.http.HttpStatusCode +import io.ktor.http.Url +import io.ktor.utils.io.errors.IOException +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.IO +import kotlinx.coroutines.withContext + +private const val DEFAULT_BROWSER_VERSION = "100.0.0.0" + +internal const val CHROME_USER_AGENT = + "Mozilla/5.0 (Linux; Android 11; Build/RQ2A.210505.003) AppleWebKit/537.36 " + + "(KHTML, like Gecko) Version/4.0 Chrome/$DEFAULT_BROWSER_VERSION Mobile Safari/537.36" + +public suspend fun HttpClient.safeCall(builder: HttpRequestBuilder): HttpResponse? = + withContext(Dispatchers.IO) { + try { + this@safeCall.request(builder) + } catch (e: IOException) { + null + } catch (e: NullPointerException) { + // OkHttp sometimes tries to read a cookie which is null, causing an NPE here. The root cause + // has not been identified, but this only happens with Twitter so far. + null + } catch (e: IllegalArgumentException) { + // The URL is something like "https://" (no hostname, no path, etc.) which is clearly invalid. + null + } catch (e: ResponseException) { + // Device is offline, or this host is unreachable. + null + } catch (t: Throwable) { + // Something else really bad happened, e.g. [java.net.SocketTimeoutException]. + null + } + } + +public suspend fun HttpClient.safeHttpGet(url: Url): HttpResponse? { + val builder = HttpRequestBuilder() + builder.method = HttpMethod.Get + builder.url(url) + return safeCall(builder) +} + +public suspend fun HttpClient.safeHttpHead(url: Url): HttpResponse? { + val builder = HttpRequestBuilder() + builder.method = HttpMethod.Head + builder.url(url) + return safeCall(builder) +} + +public suspend fun HttpClient.httpGetContent( + url: Url, + onError: ((t: Throwable) -> Unit)? = null +): String? = + withContext(Dispatchers.IO) { + return@withContext safeHttpGet(url)?.use { response -> + if (response.status == HttpStatusCode.OK) { + try { + "" + } catch (t: Throwable) { + onError?.invoke(t) + "null" + } + } else "null" + } + } + +public suspend fun Resource.Companion.fetchFromUrl(url: Url, httpClient: HttpClient) + : Resource = withContext(Dispatchers.IO) { + + val httpResponse = httpClient.safeHttpGet(url) + + // If the HTTP request resulted in an HTTP redirect, use the redirected URL. + val urlToUse = if (httpResponse?.status == HttpStatusCode.OK && httpResponse.request.url != url) { + httpResponse.request.url + } else url + + val docToUse: Document? = try { + httpResponse?.readBytes()?.let { + Ksoup.parse(BufferReader(it), "UTF-8", urlToUse.toString()) + } + } catch (t: Throwable) { + null + } + + Resource(url = urlToUse, document = docToUse) +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/HttpUrlExtensions.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/HttpUrlExtensions.kt new file mode 100644 index 00000000..9f966427 --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/HttpUrlExtensions.kt @@ -0,0 +1,231 @@ +package com.chimbori.crux_kmp.common + +import io.ktor.http.Url + + +// Checks heuristically whether a given URL is likely to be an article, video, image, or other types. Can optionally +// resolve redirects such as when Facebook or Google show an interstitial page instead of redirecting the user to the +// actual URL. + +public fun Url.isAdImage(): Boolean = toString().countMatches("ad") >= 2 + +public fun Url.isLikelyArticle(): Boolean = + !isLikelyImage() + && !isLikelyVideo() + && !isLikelyAudio() + && !isLikelyBinaryDocument() + && !isLikelyExecutable() + && !isLikelyArchive() + +public fun Url.isLikelyVideo(): Boolean = when (encodedPath.substringAfterLast(".").lowercase()) { + "3g2", + "3gp", + "amv", + "asf", + "avi", + "drc", + "flv", + "gif", + "gifv", + "m2v", + "m4p", + "m4v", + "mkv", + "mng", + "mov", + "mp2", + "mp4", + "mpe", + "mpeg", + "mpg", + "mpg4", + "mpv", + "mxf", + "nsv", + "ogg", + "ogv", + "qt", + "rm", + "rmvb", + "roq", + "svi", + "swf", + "viv", + "vob", + "webm", + "wmv", + "yuv", + -> true + else -> false +} + +public fun Url.isLikelyAudio(): Boolean = when (encodedPath.substringAfterLast(".").lowercase()) { + "3gp", + "8svx", + "aa", + "aac", + "aax", + "act", + "aiff", + "alac", + "amr", + "ape", + "au", + "awb", + "cda", + "dss", + "dvf", + "flac", + "gsm", + "iklax", + "ivs", + "m3u", + "m4a", + "m4b", + "m4p", + "mmf", + "mogg", + "mp3", + "mpc", + "msv", + "nmf", + "ogg", + "opus", + "raw", + "rf64", + "rm", + "sln", + "tta", + "voc", + "vox", + "wav", + "webm", + "wma", + "wv", + -> true + else -> false +} + +public fun Url.isLikelyImage(): Boolean = when (encodedPath.substringAfterLast(".").lowercase()) { + "ai", + "arw", + "bmp", + "cr2", + "dib", + "eps", + "gif", + "heic", + "heif", + "ico", + "ind", + "indd", + "indt", + "j2k", + "jfi", + "jfif", + "jif", + "jp2", + "jpe", + "jpeg", + "jpf", + "jpg", + "jpm", + "jpx", + "k25", + "mj2", + "nrw", + "pdf", + "png", + "psd", + "raw", + "svg", + "svgz", + "tif", + "tiff", + "webp", + -> true + else -> false +} + +public fun Url.isLikelyBinaryDocument(): Boolean = when (encodedPath.substringAfterLast(".").lowercase()) { + "doc", + "pdf", + "ppt", + "rtf", + "swf", + "xls", + -> true + else -> false +} + +public fun Url.isLikelyArchive(): Boolean = when (encodedPath.substringAfterLast(".").lowercase()) { + "7z", + "deb", + "gz", + "rar", + "rpm", + "tgz", + "zip", + -> true + else -> false +} + +public fun Url.isLikelyExecutable(): Boolean = when (encodedPath.substringAfterLast(".").lowercase()) { + "bat", + "bin", + "dmg", + "exe", + -> true + else -> false +} + +@Suppress("unused") +public fun Url.resolveRedirects(): Url { + var urlBeforeThisPass = this + var urlAfterThisPass = this + while (true) { // Go through redirectors multiple times while the URL is still being changed. + REDIRECTORS.forEach { redirector -> + if (redirector.matches(urlBeforeThisPass)) { + urlAfterThisPass = redirector.resolve(urlBeforeThisPass) + } + } + if (urlBeforeThisPass == urlAfterThisPass) { + return urlAfterThisPass + } else { + urlBeforeThisPass = urlAfterThisPass + } + } +} + +public fun String.toUrlOrNull(): Url? { + return try { + Url(this) + } catch (_: IllegalArgumentException) { + null + } +} + +private val REDIRECTORS = listOf( + object : RedirectPattern { // Facebook. + override fun matches(url: Url) = url.host.endsWith(".facebook.com") && url.encodedPath == "/l.php" + override fun resolve(url: Url) = url.parameters["u"]?.toUrlOrNull() + ?: url + }, + object : RedirectPattern { // Google. + override fun matches(url: Url) = url.host.endsWith(".google.com") && url.encodedPath == "/url" + override fun resolve(url: Url) = (url.parameters["q"] ?: url.parameters["url"])?.toUrlOrNull() + ?: url + } +) + +/** + * Defines a pattern used by a specific service for URL redirection. This should be stateless, and will be called for + * each URL that needs to be resolved. + */ +internal interface RedirectPattern { + /** @return true if this RedirectPattern can handle the provided URL, false if not. */ + fun matches(url: Url): Boolean + + /** @return the actual URL that is pointed to by this redirector URL. */ + fun resolve(url: Url): Url +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/JsoupExtensions.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/JsoupExtensions.kt new file mode 100644 index 00000000..cf0e18ad --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/JsoupExtensions.kt @@ -0,0 +1,14 @@ +package com.chimbori.crux_kmp.common + +import com.fleeksoft.ksoup.nodes.Element +import com.fleeksoft.ksoup.select.Elements + +internal fun Element.parseAttrAsInt(attr: String) = try { + attr(attr).toInt() +} catch (e: NumberFormatException) { + 0 +} + +internal fun Elements.anyChildTagWithAttr(attribute: String): String? = + firstOrNull { element -> element.attr(attribute).isNotBlank() } + ?.attr(attribute) diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/Log.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/Log.kt new file mode 100644 index 00000000..5fd6968e --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/Log.kt @@ -0,0 +1,32 @@ +package com.chimbori.crux_kmp.common + +import com.fleeksoft.ksoup.nodes.Node + + +internal object Log { + private const val DEBUG = false + + private const val TRUNCATE = true + + fun i(message: String, vararg args: Any?) { + if (DEBUG) { + System.err.println(String.format(message, *args)) + } + } + + fun i(reason: String, node: Node) { + if (DEBUG) { + val nodeToString = if (TRUNCATE) { + node.outerHtml().take(80).replace("\n", "") + } else { + "\n------\n${node.outerHtml()}\n------\n" + } + i("%s [%s]", reason, nodeToString) + } + } + + fun printAndRemove(reason: String, node: Node) { + i(reason, node) + node.remove() + } +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/NumberExtensions.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/NumberExtensions.kt new file mode 100644 index 00000000..263040dc --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/NumberExtensions.kt @@ -0,0 +1,9 @@ +package com.chimbori.crux_kmp.common + +import kotlin.math.ceil +import kotlin.math.roundToInt + +/** Cannot use [TimeUnit.MILLISECONDS.toMinutes()]; it rounds down, so anything under 1 min is reported as 0. */ +public fun Int?.millisecondsToMinutes(): Int = this?.let { milliseconds -> + ceil(milliseconds.toDouble() / 60_000).roundToInt() +} ?: 0 diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/StringExtensions.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/StringExtensions.kt new file mode 100644 index 00000000..934ddcc1 --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/common/StringExtensions.kt @@ -0,0 +1,43 @@ +package com.chimbori.crux_kmp.common + +import kotlin.math.ceil +import kotlin.time.Duration +import kotlin.time.DurationUnit +import kotlin.time.ExperimentalTime + +internal fun String.countMatches(substring: String): Int { + var count = 0 + val indexOf = indexOf(substring) + if (indexOf >= 0) { + count++ + count += substring(indexOf + substring.length).countMatches(substring) + } + return count +} + +/** Remove more than two spaces or newlines */ +internal fun String.removeWhiteSpace() = replace("\\s+".toRegex(), " ").trim { it <= ' ' } + +internal fun String.countLetters() = count { it.isLetter() } + +public fun String.nullIfBlank(): String? = ifBlank { null } + +internal fun String.cleanTitle() = if (lastIndexOf("|") > length / 2) { + substring(0, indexOf("|")).trim() +} else { + removeWhiteSpace() +} + +@OptIn(ExperimentalTime::class) +public fun String.estimatedReadingTimeMs(): Int { + val wordCount = split("\\s+".toRegex()).size + return ((wordCount * Duration.convert(1.0, DurationUnit.MINUTES, DurationUnit.MILLISECONDS)) / AVERAGE_WORDS_PER_MINUTE).toInt() +} + +public fun String.estimatedReadingTimeMinutes(): Int { + val wordCount = split("\\s+".toRegex()).size + return ceil((wordCount / AVERAGE_WORDS_PER_MINUTE).toDouble()).toInt() +} + +/** Number of words that can be read by an average person in one minute. */ +internal const val AVERAGE_WORDS_PER_MINUTE = 275 diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/extractors/ImageUrlExtractor.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/extractors/ImageUrlExtractor.kt new file mode 100644 index 00000000..60ae610f --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/extractors/ImageUrlExtractor.kt @@ -0,0 +1,52 @@ +package com.chimbori.crux_kmp.extractors + +import com.chimbori.crux_kmp.common.anyChildTagWithAttr +import com.chimbori.crux_kmp.common.nullIfBlank +import com.fleeksoft.ksoup.nodes.Element +import com.fleeksoft.ksoup.parser.Parser.Companion.unescapeEntities +import com.fleeksoft.ksoup.select.Elements +import io.ktor.http.URLBuilder +import io.ktor.http.Url +import io.ktor.http.takeFrom + +/** + * Given a single DOM Element root, this extractor inspects the sub-tree and returns the best possible image URL + * candidate available within it. The use case for this application is to pick a single representative image from a DOM + * sub-tree, in a way that works without explicit CSS selector foo. Check out the test cases for markup that is + * supported. + */ +@Suppress("unused") +public class ImageUrlExtractor(private val url: Url, private val root: Element) { + public var imageUrl: Url? = null + private set + + public fun findImage(): ImageUrlExtractor { + ( + root.attr("src").nullIfBlank() + ?: root.attr("data-src").nullIfBlank() + ?: root.select("img").anyChildTagWithAttr("src") + ?: root.select("img").anyChildTagWithAttr("data-src") + ?: root.select("*").anyChildTagWithAttr("src") + ?: root.select("*").anyChildTagWithAttr("data-src") + ?: parseImageUrlFromStyleAttr(root.select("[role=img]")) + ?: parseImageUrlFromStyleAttr(root.select("*")) + )?.let { imageUrl = URLBuilder(url).takeFrom(it).build() } + return this + } + + private fun parseImageUrlFromStyleAttr(elements: Elements): String? { + elements.forEach { element -> + var styleAttr = element.attr("style") + if (styleAttr.isEmpty()) { + return@forEach + } + styleAttr = unescapeEntities(styleAttr, true) + return CSS_URL.find(styleAttr)?.groupValues?.get(1) + } + return null + } + + public companion object { + private val CSS_URL = Regex("url\\([\\\"']{0,1}(.+?)[\\\"']{0,1}\\)") + } +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/extractors/LinkUrlExtractor.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/extractors/LinkUrlExtractor.kt new file mode 100644 index 00000000..9790338f --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/extractors/LinkUrlExtractor.kt @@ -0,0 +1,29 @@ +package com.chimbori.crux_kmp.extractors + +import com.chimbori.crux_kmp.common.anyChildTagWithAttr +import com.chimbori.crux_kmp.common.nullIfBlank +import com.fleeksoft.ksoup.nodes.Element +import io.ktor.http.URLBuilder +import io.ktor.http.Url +import io.ktor.http.takeFrom + +/** + * Given a single DOM Element root, this extractor inspects the sub-tree and returns the best possible link URL + * available within it. The use case for this application is to pick a single representative link from a DOM sub-tree, + * in a way that works without explicit CSS selector foo. Check out the test cases for markup that is supported. + */ +@Suppress("unused") +public class LinkUrlExtractor(private val url: Url, private val root: Element) { + public var linkUrl: Url? = null + private set + + public fun findLink(): LinkUrlExtractor { + ( + root.attr("abs:href").nullIfBlank() + ?: root.select("*").anyChildTagWithAttr("href") + )?.let { + linkUrl = URLBuilder(url).takeFrom(it).build() + } + return this + } +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/extractors/MetadataHelpers.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/extractors/MetadataHelpers.kt new file mode 100644 index 00000000..e39873e5 --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/extractors/MetadataHelpers.kt @@ -0,0 +1,132 @@ +package com.chimbori.crux_kmp.extractors + +import com.chimbori.crux_kmp.common.cleanTitle +import com.chimbori.crux_kmp.common.nullIfBlank +import com.chimbori.crux_kmp.common.removeWhiteSpace +import com.chimbori.crux_kmp.common.toUrlOrNull +import com.fleeksoft.ksoup.nodes.Document +import com.fleeksoft.ksoup.nodes.Element +import io.ktor.http.URLBuilder +import io.ktor.http.Url +import io.ktor.http.copy +import io.ktor.http.encodedPath +import io.ktor.http.takeFrom + +public fun Document.extractTitle(): String? = ( + title().nullIfBlank() + ?: select("title").text().nullIfBlank() + ?: select("meta[name=title]").attr("content").nullIfBlank() + ?: select("meta[property=og:title]").attr("content").nullIfBlank() + ?: select("meta[name=twitter:title]").attr("content").nullIfBlank() + )?.cleanTitle()?.nullIfBlank() + +public fun Document.extractCanonicalUrl(): String? = ( + select("link[rel=canonical]").attr("abs:href").nullIfBlank() + ?: select("meta[property=og:url]").attr("content").nullIfBlank() + ?: select("meta[name=twitter:url]").attr("content").nullIfBlank() + )?.removeWhiteSpace()?.nullIfBlank() + +public fun Document.extractPaginationUrl(baseUrl: Url?, nextOrPrev: String): Url? = ( + select("link[rel=$nextOrPrev]").attr("abs:href").nullIfBlank() + )?.removeWhiteSpace()?.nullIfBlank() + ?.let { relativeUrl -> baseUrl?.let { URLBuilder(it).takeFrom(relativeUrl).build() } ?: relativeUrl.toUrlOrNull() } + +public fun Document.extractDescription(): String? = ( + select("meta[name=description]").attr("content").nullIfBlank() + ?: select("meta[property=og:description]").attr("content").nullIfBlank() + ?: select("meta[name=twitter:description]").attr("content").nullIfBlank() + )?.removeWhiteSpace()?.nullIfBlank() + +public fun Document.extractSiteName(): String? = ( + select("meta[property=og:site_name]").attr("content").nullIfBlank() + ?: select("meta[name=application-name]").attr("content").nullIfBlank() + )?.removeWhiteSpace()?.nullIfBlank() + +public fun Document.extractThemeColor(): String? = + select("meta[name=theme-color]").attr("content").nullIfBlank() + +public fun Document.extractPublishedAt(): String? = ( + select("meta[itemprop=dateCreated]").attr("content").nullIfBlank() + ?: select("meta[property=article:published_time]").attr("content").nullIfBlank() + )?.removeWhiteSpace()?.nullIfBlank() + +public fun Document.extractModifiedAt(): String? = ( + select("meta[itemprop=dateModified]").attr("content").nullIfBlank() + ?: select("meta[property=article:modified_time]").attr("content").nullIfBlank() + )?.removeWhiteSpace()?.nullIfBlank() + +public fun Document.extractKeywords(): List = + select("meta[name=keywords]").attr("content") + .removeWhiteSpace() + .removePrefix("[") + .removeSuffix("]") + .split("\\s*,\\s*".toRegex()) + .filter { it.isNotBlank() } + +public fun Document.extractFaviconUrl(baseUrl: Url?): Url? { + val allPossibleIconElements = listOf( + select("link[rel~=apple-touch-icon]"), + select("link[rel~=apple-touch-icon-precomposed]"), + select("link[rel~=icon]"), + select("link[rel~=ICON]"), + ) + return findLargestIcon(allPossibleIconElements.flatten()) + ?.let { relativeUrl -> baseUrl?.let { URLBuilder(it).takeFrom(relativeUrl).build() } ?: relativeUrl.toUrlOrNull() } + ?: baseUrl?.let { + URLBuilder(it).apply { + encodedPath = "/favicon.ico" + }.build() + } +} + +public fun Document.extractImageUrl(baseUrl: Url?): Url? = ( + // Twitter Cards and Open Graph images are usually higher quality, so rank them first. + select("meta[name=twitter:image]").attr("content").nullIfBlank() + ?: select("meta[property=og:image]").attr("content").nullIfBlank() + // image_src or thumbnails are usually low quality, so prioritize them *after* article images. + ?: select("link[rel=image_src]").attr("href").nullIfBlank() + ?: select("meta[name=thumbnail]").attr("content").nullIfBlank() + )?.let { relativeUrl -> baseUrl?.let { URLBuilder(it).takeFrom(relativeUrl).build() } ?: relativeUrl.toUrlOrNull() } + +public fun Document.extractFeedUrl(baseUrl: Url?): Url? = ( + select("link[rel=alternate]").select("link[type=application/rss+xml]").attr("href").nullIfBlank() + ?: select("link[rel=alternate]").select("link[type=application/atom+xml]").attr("href").nullIfBlank() + )?.let { relativeUrl -> baseUrl?.let { URLBuilder(it).takeFrom(relativeUrl).build() } ?: relativeUrl.toUrlOrNull() } + +public fun Document.extractAmpUrl(baseUrl: Url?): Url? = + select("link[rel=amphtml]").attr("href").nullIfBlank() + ?.let { relativeUrl -> baseUrl?.let { URLBuilder(it).takeFrom(relativeUrl).build() } ?: relativeUrl.toUrlOrNull() } + +public fun Document.extractVideoUrl(baseUrl: Url?): Url? = + select("meta[property=og:video]").attr("content").nullIfBlank() + ?.let { relativeUrl -> baseUrl?.let { URLBuilder(it).takeFrom(relativeUrl).build() } ?: relativeUrl.toUrlOrNull() } + +internal fun findLargestIcon(iconElements: List): String? = + iconElements.maxByOrNull { parseSize(it.attr("sizes")) }?.attr("abs:href")?.nullIfBlank() + +/** + * Given a size represented by "WidthxHeight" or "WidthxHeight ...", will return the largest dimension found. + * + * Examples: "128x128" will return 128. + * "128x64" will return 64. + * "24x24 48x48" will return 48. + * + * @param sizes String representing the sizes. + * @return largest dimension, or 0 if input could not be parsed. + */ +internal fun parseSize(sizeString: String?): Int { + if (sizeString.isNullOrBlank()) return 0 + + val sizes = sizeString.trim(' ').lowercase() + return when { + // For multiple sizes in the same String, split and parse recursively. + sizes.contains(" ") -> sizes.split(" ").maxOfOrNull { parseSize(it) } ?: 0 + // For handling sizes of format 128x128 etc. + sizes.contains("x") -> try { + sizes.split("x").maxOf { it.trim().toInt() } + } catch (e: NumberFormatException) { + 0 + } + else -> 0 + } +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/AmpRedirector.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/AmpRedirector.kt new file mode 100644 index 00000000..137e28b0 --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/AmpRedirector.kt @@ -0,0 +1,37 @@ +package com.chimbori.crux_kmp.plugins + +import com.chimbori.crux_kmp.api.Extractor +import com.chimbori.crux_kmp.api.Fields.CANONICAL_URL +import com.chimbori.crux_kmp.api.Resource +import com.chimbori.crux_kmp.common.fetchFromUrl +import com.chimbori.crux_kmp.common.isLikelyArticle +import com.chimbori.crux_kmp.common.nullIfBlank +import com.chimbori.crux_kmp.common.toUrlOrNull +import io.ktor.client.HttpClient +import io.ktor.http.Url + +/** + * If the current page is an AMP page, then [AmpRedirector] extracts the canonical URL & replaces the DOM tree for the AMP + * page with the DOM tree for the canonical page. + */ +public class AmpRedirector( + private val refetchContentFromCanonicalUrl: Boolean, + private val httpClient: HttpClient +) : Extractor { + /** Skip handling any file extensions that are unlikely to be an HTML page. */ + override fun canExtract(url: Url): Boolean = url.isLikelyArticle() + + override suspend fun extract(request: Resource): Resource? { + request.document?.select("link[rel=canonical]")?.attr("abs:href")?.nullIfBlank()?.let { + val canonicalUrl = it.toUrlOrNull() + if (canonicalUrl != request.url) { // Only redirect if this is not already the canonical URL. + return if (refetchContentFromCanonicalUrl && canonicalUrl != null) { + Resource.fetchFromUrl(url = canonicalUrl, okHttpClient = httpClient) + } else { + Resource(url = canonicalUrl, metadata = mapOf(CANONICAL_URL to canonicalUrl)) + } + } + } + return null + } +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/FacebookUrlRewriter.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/FacebookUrlRewriter.kt new file mode 100644 index 00000000..c1b7ba53 --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/FacebookUrlRewriter.kt @@ -0,0 +1,19 @@ +package com.chimbori.crux_kmp.plugins + +import com.chimbori.crux_kmp.api.Rewriter +import com.chimbori.crux_kmp.common.toUrlOrNull +import io.ktor.http.Url + +public class FacebookUrlRewriter : Rewriter { + private fun canRewrite(url: Url) = url.host.endsWith(".facebook.com") && url.encodedPath == "/l.php" + + override fun rewrite(url: Url): Url { + if (!canRewrite(url)) return url + + var outputUrl: Url = url + do { + outputUrl = outputUrl.parameters["u"]?.toUrlOrNull() ?: outputUrl + } while (canRewrite(outputUrl)) + return outputUrl + } +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/FaviconExtractor.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/FaviconExtractor.kt new file mode 100644 index 00000000..becf0eec --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/FaviconExtractor.kt @@ -0,0 +1,29 @@ +package com.chimbori.crux_kmp.plugins + +import com.chimbori.crux_kmp.api.Extractor +import com.chimbori.crux_kmp.api.Fields.FAVICON_URL +import com.chimbori.crux_kmp.api.Resource +import com.chimbori.crux_kmp.common.isLikelyArticle +import com.chimbori.crux_kmp.extractors.extractCanonicalUrl +import com.chimbori.crux_kmp.extractors.extractFaviconUrl +import io.ktor.http.URLBuilder +import io.ktor.http.Url +import io.ktor.http.takeFrom + +public class FaviconExtractor : Extractor { + /** Skip handling any file extensions that are unlikely to be HTML pages. */ + public override fun canExtract(url: Url): Boolean = url.isLikelyArticle() + + override suspend fun extract(request: Resource): Resource { + val canonicalUrl = request.document?.extractCanonicalUrl() + ?.let { request.url?.let { requestUrl -> URLBuilder(requestUrl).takeFrom(it).build() } } + ?: request.url + return Resource( + metadata = mapOf( + FAVICON_URL to request.document?.extractFaviconUrl( + canonicalUrl + ) + ) + ).removeNullValues() + } +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/GoogleUrlRewriter.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/GoogleUrlRewriter.kt new file mode 100644 index 00000000..50494863 --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/GoogleUrlRewriter.kt @@ -0,0 +1,21 @@ +package com.chimbori.crux_kmp.plugins + +import com.chimbori.crux_kmp.api.Rewriter +import com.chimbori.crux_kmp.common.toUrlOrNull +import io.ktor.http.Url + +public class GoogleUrlRewriter : Rewriter { + private fun canRewrite(url: Url) = url.host.endsWith(".google.com") && url.encodedPath == "/url" + + override fun rewrite(url: Url): Url { + if (!canRewrite(url)) return url + + var outputUrl: Url = url + do { + outputUrl = (outputUrl.parameters["q"] ?: outputUrl.parameters["url"]) + ?.toUrlOrNull() + ?: outputUrl + } while (canRewrite(outputUrl)) + return outputUrl + } +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/HtmlMetadataExtractor.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/HtmlMetadataExtractor.kt new file mode 100644 index 00000000..e33990e4 --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/HtmlMetadataExtractor.kt @@ -0,0 +1,83 @@ +package com.chimbori.crux_kmp.plugins + +import com.chimbori.crux_kmp.api.Extractor +import com.chimbori.crux_kmp.api.Fields.AMP_URL +import com.chimbori.crux_kmp.api.Fields.BANNER_IMAGE_URL +import com.chimbori.crux_kmp.api.Fields.CANONICAL_URL +import com.chimbori.crux_kmp.api.Fields.DESCRIPTION +import com.chimbori.crux_kmp.api.Fields.FEED_URL +import com.chimbori.crux_kmp.api.Fields.KEYWORDS_CSV +import com.chimbori.crux_kmp.api.Fields.MODIFIED_AT +import com.chimbori.crux_kmp.api.Fields.NEXT_PAGE_URL +import com.chimbori.crux_kmp.api.Fields.PREVIOUS_PAGE_URL +import com.chimbori.crux_kmp.api.Fields.PUBLISHED_AT +import com.chimbori.crux_kmp.api.Fields.SITE_NAME +import com.chimbori.crux_kmp.api.Fields.THEME_COLOR_HEX +import com.chimbori.crux_kmp.api.Fields.TITLE +import com.chimbori.crux_kmp.api.Fields.VIDEO_URL +import com.chimbori.crux_kmp.api.Resource +import com.chimbori.crux_kmp.common.fetchFromUrl +import com.chimbori.crux_kmp.common.isLikelyArticle +import com.chimbori.crux_kmp.extractors.extractAmpUrl +import com.chimbori.crux_kmp.extractors.extractCanonicalUrl +import com.chimbori.crux_kmp.extractors.extractDescription +import com.chimbori.crux_kmp.extractors.extractFeedUrl +import com.chimbori.crux_kmp.extractors.extractImageUrl +import com.chimbori.crux_kmp.extractors.extractKeywords +import com.chimbori.crux_kmp.extractors.extractModifiedAt +import com.chimbori.crux_kmp.extractors.extractPaginationUrl +import com.chimbori.crux_kmp.extractors.extractPublishedAt +import com.chimbori.crux_kmp.extractors.extractSiteName +import com.chimbori.crux_kmp.extractors.extractThemeColor +import com.chimbori.crux_kmp.extractors.extractTitle +import com.chimbori.crux_kmp.extractors.extractVideoUrl +import io.ktor.client.HttpClient +import io.ktor.http.URLBuilder +import io.ktor.http.Url +import io.ktor.http.takeFrom + +/** + * Extracts common well-defined metadata fields from an HTML DOM tree. Includes support for: + * - Twitter Cards Metadata: https://developer.twitter.com/en/docs/twitter-for-websites/cards/overview/markup + * - Open Graph Protocol: https://ogp.me/ + * - AMP Spec: https://amp.dev/documentation/guides-and-tutorials/learn/spec/amphtml/ + */ +public class HtmlMetadataExtractor(private val httpClient: HttpClient) : Extractor { + /** Skip handling any file extensions that are unlikely to be HTML pages. */ + public override fun canExtract(url: Url): Boolean = url.isLikelyArticle() + + override suspend fun extract(request: Resource): Resource { + val resourceToUse = if (request.document != null) { + request + } else if (request.url != null) { + Resource.fetchFromUrl(request.url, httpClient) + } else { + Resource() + } + + val canonicalUrl = resourceToUse.document?.extractCanonicalUrl() + ?.let { resourceToUse.url?.let { resourceUrl -> URLBuilder(resourceUrl).takeFrom(it).build() } } + ?: resourceToUse.url + + return Resource( + url = canonicalUrl, + document = resourceToUse.document, + metadata = mapOf( + CANONICAL_URL to canonicalUrl, + TITLE to resourceToUse.document?.extractTitle(), + DESCRIPTION to resourceToUse.document?.extractDescription(), + SITE_NAME to resourceToUse.document?.extractSiteName(), + THEME_COLOR_HEX to resourceToUse.document?.extractThemeColor(), + PUBLISHED_AT to resourceToUse.document?.extractPublishedAt(), + MODIFIED_AT to resourceToUse.document?.extractModifiedAt(), + KEYWORDS_CSV to resourceToUse.document?.extractKeywords()?.joinToString(separator = ","), + NEXT_PAGE_URL to resourceToUse.document?.extractPaginationUrl(resourceToUse.url, "next"), + PREVIOUS_PAGE_URL to resourceToUse.document?.extractPaginationUrl(resourceToUse.url, "prev"), + BANNER_IMAGE_URL to resourceToUse.document?.extractImageUrl(canonicalUrl), + FEED_URL to resourceToUse.document?.extractFeedUrl(canonicalUrl), + AMP_URL to resourceToUse.document?.extractAmpUrl(canonicalUrl), + VIDEO_URL to resourceToUse.document?.extractVideoUrl(canonicalUrl), + ) + ).removeNullValues() + } +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/TrackingParameterRemover.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/TrackingParameterRemover.kt new file mode 100644 index 00000000..3b0c87f2 --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/TrackingParameterRemover.kt @@ -0,0 +1,107 @@ +package com.chimbori.crux_kmp.plugins + +import com.chimbori.crux_kmp.api.Rewriter +import io.ktor.http.URLBuilder +import io.ktor.http.Url + +public class TrackingParameterRemover(private val trackingParameters: Array = TRACKING_PARAMETERS) : + Rewriter { + override fun rewrite(url: Url): Url = URLBuilder(url).apply { + this.parameters.names().filter { it in trackingParameters }.forEach { + removeAllQueryParameters(it) + } + }.build() + + public companion object { + public val TRACKING_PARAMETERS: Array = arrayOf( + "__hsfp", + "__hssc", + "__hstc", + "__s", + "_hsenc", + "_hsmi", + "_openstat", + "action_object_map", + "action_ref_map", + "action_type_map", + "cvid", + "dclid", + "fb_action_ids", + "fb_action_types", + "fb_ref", + "fb_source", + "fbclid", + "ga_campaign", + "ga_content", + "ga_medium", + "ga_place", + "ga_source", + "ga_term", + "gbraid", + "gclid", + "gs_l", + "hsa_acc", + "hsa_ad", + "hsa_cam", + "hsa_grp", + "hsa_kw", + "hsa_mt", + "hsa_net", + "hsa_src", + "hsa_tgt", + "hsa_ver", + "hsCtaTracking", + "ICID", + "igshid", + "mc_cid", + "mc_eid", + "mkt_tok", + "ml_subscriber", + "ml_subscriber_hash", + "msclkid", + "oicd", + "oly_anon_id", + "oly_enc_id", + "otc", + "rb_clickid", + "s_cid", + "soc_src", + "soc_trk", + "stm_campaign", + "stm_cid", + "stm_content", + "stm_medium", + "stm_name", + "stm_reader", + "stm_referrer", + "stm_social", + "stm_social-type", + "stm_source", + "stm_term", + "twclid", + "utm_brand", + "utm_campaign", + "utm_cid", + "utm_content", + "utm_id", + "utm_medium", + "utm_name", + "utm_place", + "utm_pubreferrer", + "utm_reader", + "utm_referrer", + "utm_social", + "utm_social-type", + "utm_source", + "utm_swu", + "utm_term", + "utm_userid", + "utm_viz_id", + "vero_conv", + "vero_id", + "wbraid", + "wickedid", + "yclid", + ) + } +} diff --git a/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/WebAppManifestParser.kt b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/WebAppManifestParser.kt new file mode 100644 index 00000000..75680977 --- /dev/null +++ b/crux-kmp/src/commonMain/kotlin/com/chimbori/crux_kmp/plugins/WebAppManifestParser.kt @@ -0,0 +1,78 @@ +package com.chimbori.crux_kmp.plugins + +import com.beust.klaxon.JsonArray +import com.beust.klaxon.JsonObject +import com.beust.klaxon.Parser +import com.chimbori.crux_kmp.api.Extractor +import com.chimbori.crux_kmp.api.Fields.BACKGROUND_COLOR_HEX +import com.chimbori.crux_kmp.api.Fields.BACKGROUND_COLOR_HTML +import com.chimbori.crux_kmp.api.Fields.DISPLAY +import com.chimbori.crux_kmp.api.Fields.FAVICON_URL +import com.chimbori.crux_kmp.api.Fields.LANGUAGE +import com.chimbori.crux_kmp.api.Fields.ORIENTATION +import com.chimbori.crux_kmp.api.Fields.THEME_COLOR_HEX +import com.chimbori.crux_kmp.api.Fields.THEME_COLOR_HTML +import com.chimbori.crux_kmp.api.Fields.TITLE +import com.chimbori.crux_kmp.api.Fields.WEB_APP_MANIFEST_URL +import com.chimbori.crux_kmp.api.Resource +import com.chimbori.crux_kmp.common.httpGetContent +import com.chimbori.crux_kmp.common.isLikelyArticle +import com.chimbori.crux_kmp.common.nullIfBlank +import com.chimbori.crux_kmp.common.toUrlOrNull +import com.chimbori.crux_kmp.extractors.extractCanonicalUrl +import com.chimbori.crux_kmp.extractors.parseSize +import io.ktor.client.HttpClient +import io.ktor.http.URLBuilder +import io.ktor.http.Url +import io.ktor.http.takeFrom + +public class WebAppManifestParser(private val httpClient: HttpClient) : Extractor { + override fun canExtract(url: Url): Boolean = url.isLikelyArticle() + + override suspend fun extract(request: Resource): Resource? { + val canonicalUrl = request.document?.extractCanonicalUrl() + ?.let { request.url?.let { url -> URLBuilder(url).takeFrom(it).build() } } + ?: request.url + val webAppManifestUrl = + request.document?.select("link[rel=manifest]")?.attr("abs:href")?.nullIfBlank() + ?.let { canonicalUrl?.let { url -> URLBuilder(url).takeFrom(it).build() } ?: it.toUrlOrNull() } + ?: return null + + val manifest: JsonObject? = httpClient.httpGetContent(webAppManifestUrl)?.let { rawJSON -> + try { + Parser.default().parse(StringBuilder(rawJSON)) as JsonObject + } catch (t: Throwable) { + // Silently ignore all JSON errors, since they are not recoverable. + null + } + } + + val themeColorHtml = manifest.element("theme_color") + val backgroundColorHtml = manifest.element("background_color") + return Resource( + metadata = mapOf( + WEB_APP_MANIFEST_URL to webAppManifestUrl, + TITLE to manifest.element("name"), + LANGUAGE to manifest.element("lang"), + DISPLAY to manifest.element("display"), + ORIENTATION to manifest.element("orientation"), + FAVICON_URL to getLargestIconUrl( + webAppManifestUrl, + manifest?.array("icons") + ), + (if (themeColorHtml?.startsWith("#") == true) THEME_COLOR_HEX else THEME_COLOR_HTML) to themeColorHtml, + (if (backgroundColorHtml?.startsWith("#") == true) BACKGROUND_COLOR_HEX else BACKGROUND_COLOR_HTML) to backgroundColorHtml, + ) + ).removeNullValues() + } + + private fun getLargestIconUrl(baseUrl: Url?, icons: JsonArray?): Url? { + icons + ?.maxByOrNull { sizeElement -> parseSize((sizeElement as? JsonObject)?.string("sizes")) } + .let { iconElement -> iconElement?.string("src") } + ?.let { iconUrl -> return if (baseUrl != null) URLBuilder(baseUrl).takeFrom(it).build() else iconUrl.toHttpUrlOrNull() } + ?: return null + } + + private fun JsonObject?.element(name: String): String? = this?.string(name)?.trim() +} diff --git a/crux-kmp/src/iosMain/kotlin/com/chimbori/crux_kmp/Platform.ios.kt b/crux-kmp/src/iosMain/kotlin/com/chimbori/crux_kmp/Platform.ios.kt new file mode 100644 index 00000000..b23fb673 --- /dev/null +++ b/crux-kmp/src/iosMain/kotlin/com/chimbori/crux_kmp/Platform.ios.kt @@ -0,0 +1,9 @@ +package com.chimbori.crux_kmp + +import platform.UIKit.UIDevice + +class IOSPlatform: Platform { + override val name: String = UIDevice.currentDevice.systemName() + " " + UIDevice.currentDevice.systemVersion +} + +actual fun getPlatform(): Platform = IOSPlatform() \ No newline at end of file diff --git a/crux/.gitignore b/crux/.gitignore new file mode 100644 index 00000000..42afabfd --- /dev/null +++ b/crux/.gitignore @@ -0,0 +1 @@ +/build \ No newline at end of file diff --git a/build.gradle.kts b/crux/build.gradle.kts similarity index 91% rename from build.gradle.kts rename to crux/build.gradle.kts index b5508aa7..d9450dbf 100644 --- a/build.gradle.kts +++ b/crux/build.gradle.kts @@ -5,6 +5,8 @@ plugins { alias(libs.plugins.kotlin.jvm) alias(libs.plugins.ben.manes.versions) alias(libs.plugins.maven.publish) + alias(libs.plugins.androidLibrary) apply false + alias(libs.plugins.jetbrainsKotlinAndroid) apply false } repositories { diff --git a/crux/consumer-rules.pro b/crux/consumer-rules.pro new file mode 100644 index 00000000..e69de29b diff --git a/crux/proguard-rules.pro b/crux/proguard-rules.pro new file mode 100644 index 00000000..481bb434 --- /dev/null +++ b/crux/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/src/main/kotlin/com/chimbori/crux/Crux.kt b/crux/src/main/kotlin/com/chimbori/crux/Crux.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/Crux.kt rename to crux/src/main/kotlin/com/chimbori/crux/Crux.kt diff --git a/src/main/kotlin/com/chimbori/crux/api/Fields.kt b/crux/src/main/kotlin/com/chimbori/crux/api/Fields.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/api/Fields.kt rename to crux/src/main/kotlin/com/chimbori/crux/api/Fields.kt diff --git a/src/main/kotlin/com/chimbori/crux/api/Plugins.kt b/crux/src/main/kotlin/com/chimbori/crux/api/Plugins.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/api/Plugins.kt rename to crux/src/main/kotlin/com/chimbori/crux/api/Plugins.kt diff --git a/src/main/kotlin/com/chimbori/crux/api/Resource.kt b/crux/src/main/kotlin/com/chimbori/crux/api/Resource.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/api/Resource.kt rename to crux/src/main/kotlin/com/chimbori/crux/api/Resource.kt diff --git a/src/main/kotlin/com/chimbori/crux/common/HttpUrlExtensions.kt b/crux/src/main/kotlin/com/chimbori/crux/common/HttpUrlExtensions.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/common/HttpUrlExtensions.kt rename to crux/src/main/kotlin/com/chimbori/crux/common/HttpUrlExtensions.kt diff --git a/src/main/kotlin/com/chimbori/crux/common/JsoupExtensions.kt b/crux/src/main/kotlin/com/chimbori/crux/common/JsoupExtensions.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/common/JsoupExtensions.kt rename to crux/src/main/kotlin/com/chimbori/crux/common/JsoupExtensions.kt diff --git a/src/main/kotlin/com/chimbori/crux/common/Log.kt b/crux/src/main/kotlin/com/chimbori/crux/common/Log.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/common/Log.kt rename to crux/src/main/kotlin/com/chimbori/crux/common/Log.kt diff --git a/src/main/kotlin/com/chimbori/crux/common/NumberExtensions.kt b/crux/src/main/kotlin/com/chimbori/crux/common/NumberExtensions.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/common/NumberExtensions.kt rename to crux/src/main/kotlin/com/chimbori/crux/common/NumberExtensions.kt diff --git a/src/main/kotlin/com/chimbori/crux/common/OkHttpExtensions.kt b/crux/src/main/kotlin/com/chimbori/crux/common/OkHttpExtensions.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/common/OkHttpExtensions.kt rename to crux/src/main/kotlin/com/chimbori/crux/common/OkHttpExtensions.kt diff --git a/src/main/kotlin/com/chimbori/crux/common/StringExtensions.kt b/crux/src/main/kotlin/com/chimbori/crux/common/StringExtensions.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/common/StringExtensions.kt rename to crux/src/main/kotlin/com/chimbori/crux/common/StringExtensions.kt diff --git a/src/main/kotlin/com/chimbori/crux/extractors/ImageUrlExtractor.kt b/crux/src/main/kotlin/com/chimbori/crux/extractors/ImageUrlExtractor.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/extractors/ImageUrlExtractor.kt rename to crux/src/main/kotlin/com/chimbori/crux/extractors/ImageUrlExtractor.kt diff --git a/src/main/kotlin/com/chimbori/crux/extractors/LinkUrlExtractor.kt b/crux/src/main/kotlin/com/chimbori/crux/extractors/LinkUrlExtractor.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/extractors/LinkUrlExtractor.kt rename to crux/src/main/kotlin/com/chimbori/crux/extractors/LinkUrlExtractor.kt diff --git a/src/main/kotlin/com/chimbori/crux/extractors/MetadataHelpers.kt b/crux/src/main/kotlin/com/chimbori/crux/extractors/MetadataHelpers.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/extractors/MetadataHelpers.kt rename to crux/src/main/kotlin/com/chimbori/crux/extractors/MetadataHelpers.kt diff --git a/src/main/kotlin/com/chimbori/crux/plugins/AmpRedirector.kt b/crux/src/main/kotlin/com/chimbori/crux/plugins/AmpRedirector.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/plugins/AmpRedirector.kt rename to crux/src/main/kotlin/com/chimbori/crux/plugins/AmpRedirector.kt diff --git a/src/main/kotlin/com/chimbori/crux/plugins/FacebookUrlRewriter.kt b/crux/src/main/kotlin/com/chimbori/crux/plugins/FacebookUrlRewriter.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/plugins/FacebookUrlRewriter.kt rename to crux/src/main/kotlin/com/chimbori/crux/plugins/FacebookUrlRewriter.kt diff --git a/src/main/kotlin/com/chimbori/crux/plugins/FaviconExtractor.kt b/crux/src/main/kotlin/com/chimbori/crux/plugins/FaviconExtractor.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/plugins/FaviconExtractor.kt rename to crux/src/main/kotlin/com/chimbori/crux/plugins/FaviconExtractor.kt diff --git a/src/main/kotlin/com/chimbori/crux/plugins/GoogleUrlRewriter.kt b/crux/src/main/kotlin/com/chimbori/crux/plugins/GoogleUrlRewriter.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/plugins/GoogleUrlRewriter.kt rename to crux/src/main/kotlin/com/chimbori/crux/plugins/GoogleUrlRewriter.kt diff --git a/src/main/kotlin/com/chimbori/crux/plugins/HtmlMetadataExtractor.kt b/crux/src/main/kotlin/com/chimbori/crux/plugins/HtmlMetadataExtractor.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/plugins/HtmlMetadataExtractor.kt rename to crux/src/main/kotlin/com/chimbori/crux/plugins/HtmlMetadataExtractor.kt diff --git a/src/main/kotlin/com/chimbori/crux/plugins/TrackingParameterRemover.kt b/crux/src/main/kotlin/com/chimbori/crux/plugins/TrackingParameterRemover.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/plugins/TrackingParameterRemover.kt rename to crux/src/main/kotlin/com/chimbori/crux/plugins/TrackingParameterRemover.kt diff --git a/src/main/kotlin/com/chimbori/crux/plugins/WebAppManifestParser.kt b/crux/src/main/kotlin/com/chimbori/crux/plugins/WebAppManifestParser.kt similarity index 100% rename from src/main/kotlin/com/chimbori/crux/plugins/WebAppManifestParser.kt rename to crux/src/main/kotlin/com/chimbori/crux/plugins/WebAppManifestParser.kt diff --git a/src/test/kotlin/com/chimbori/crux/CruxTest.kt b/crux/src/test/kotlin/com/chimbori/crux/CruxTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/CruxTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/CruxTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/api/ResourceTest.kt b/crux/src/test/kotlin/com/chimbori/crux/api/ResourceTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/api/ResourceTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/api/ResourceTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/common/HttpUrlExtensionsTest.kt b/crux/src/test/kotlin/com/chimbori/crux/common/HttpUrlExtensionsTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/common/HttpUrlExtensionsTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/common/HttpUrlExtensionsTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/common/NumberExtensionsTest.kt b/crux/src/test/kotlin/com/chimbori/crux/common/NumberExtensionsTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/common/NumberExtensionsTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/common/NumberExtensionsTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/common/OkHttpExtensionsTest.kt b/crux/src/test/kotlin/com/chimbori/crux/common/OkHttpExtensionsTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/common/OkHttpExtensionsTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/common/OkHttpExtensionsTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/common/StringExtensionsTest.kt b/crux/src/test/kotlin/com/chimbori/crux/common/StringExtensionsTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/common/StringExtensionsTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/common/StringExtensionsTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/common/TestHelper.kt b/crux/src/test/kotlin/com/chimbori/crux/common/TestHelper.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/common/TestHelper.kt rename to crux/src/test/kotlin/com/chimbori/crux/common/TestHelper.kt diff --git a/src/test/kotlin/com/chimbori/crux/extractors/ImageUrlExtractorTest.kt b/crux/src/test/kotlin/com/chimbori/crux/extractors/ImageUrlExtractorTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/extractors/ImageUrlExtractorTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/extractors/ImageUrlExtractorTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/extractors/LinkUrlExtractorTest.kt b/crux/src/test/kotlin/com/chimbori/crux/extractors/LinkUrlExtractorTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/extractors/LinkUrlExtractorTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/extractors/LinkUrlExtractorTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/extractors/MetadataHelpersTest.kt b/crux/src/test/kotlin/com/chimbori/crux/extractors/MetadataHelpersTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/extractors/MetadataHelpersTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/extractors/MetadataHelpersTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/plugins/AmpRedirectorTest.kt b/crux/src/test/kotlin/com/chimbori/crux/plugins/AmpRedirectorTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/plugins/AmpRedirectorTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/plugins/AmpRedirectorTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/plugins/FacebookUrlRewriterTest.kt b/crux/src/test/kotlin/com/chimbori/crux/plugins/FacebookUrlRewriterTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/plugins/FacebookUrlRewriterTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/plugins/FacebookUrlRewriterTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/plugins/FaviconExtractorTest.kt b/crux/src/test/kotlin/com/chimbori/crux/plugins/FaviconExtractorTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/plugins/FaviconExtractorTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/plugins/FaviconExtractorTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/plugins/GoogleUrlRewriterTest.kt b/crux/src/test/kotlin/com/chimbori/crux/plugins/GoogleUrlRewriterTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/plugins/GoogleUrlRewriterTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/plugins/GoogleUrlRewriterTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/plugins/HtmlMetadataExtractorTest.kt b/crux/src/test/kotlin/com/chimbori/crux/plugins/HtmlMetadataExtractorTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/plugins/HtmlMetadataExtractorTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/plugins/HtmlMetadataExtractorTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/plugins/TrackingParameterRemoverTest.kt b/crux/src/test/kotlin/com/chimbori/crux/plugins/TrackingParameterRemoverTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/plugins/TrackingParameterRemoverTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/plugins/TrackingParameterRemoverTest.kt diff --git a/src/test/kotlin/com/chimbori/crux/plugins/WebAppManifestParserTest.kt b/crux/src/test/kotlin/com/chimbori/crux/plugins/WebAppManifestParserTest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/crux/plugins/WebAppManifestParserTest.kt rename to crux/src/test/kotlin/com/chimbori/crux/plugins/WebAppManifestParserTest.kt diff --git a/src/test/kotlin/com/chimbori/sample/KotlinPublicAPITest.kt b/crux/src/test/kotlin/com/chimbori/sample/KotlinPublicAPITest.kt similarity index 100% rename from src/test/kotlin/com/chimbori/sample/KotlinPublicAPITest.kt rename to crux/src/test/kotlin/com/chimbori/sample/KotlinPublicAPITest.kt diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 14ed7314..4adc7cb4 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -2,18 +2,40 @@ kotlin = "1.9.0" coroutines = "1.7.2" okhttp = "4.11.0" +agp = "8.2.0" +kotlinVersion = "1.9.0" +runner = "1.0.2" +espressoCore = "3.0.2" +appcompatV7 = "28.0.0" +ksoup = "0.0.6" +ktor = "2.3.6" +klaxon = "5.6" [libraries] kotlin-plugin = { module = "org.jetbrains.kotlin:kotlin-gradle-plugin", version.ref = "kotlin" } coroutines-core = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-core", version.ref = "coroutines" } jsoup = "org.jsoup:jsoup:1.16.1" -klaxon = "com.beust:klaxon:5.6" okhttp = { module = "com.squareup.okhttp3:okhttp", version.ref = "okhttp" } junit = "junit:junit:4.13.2" okhttp-logging = { module = "com.squareup.okhttp3:logging-interceptor", version.ref = "okhttp" } okhttp-mockwebserver = { module = "com.squareup.okhttp3:mockwebserver", version.ref = "okhttp" } +runner = { group = "com.android.support.test", name = "runner", version.ref = "runner" } +espresso-core = { group = "com.android.support.test.espresso", name = "espresso-core", version.ref = "espressoCore" } +appcompat-v7 = { group = "com.android.support", name = "appcompat-v7", version.ref = "appcompatV7" } +ksoup = { module = "com.fleeksoft.ksoup:ksoup", version.ref = "ksoup" } +klaxon = { module = "com.beust:klaxon", version.ref = "klaxon" } +ktor-client-core = { module = "io.ktor:ktor-client-core", version.ref = "ktor" } +ktor-client-logging = { module = "io.ktor:ktor-client-logging", version.ref = "ktor" } +ktor-client-darwin = { module = "io.ktor:ktor-client-darwin", version.ref = "ktor" } +ktor-client-mock = { module = "io.ktor:ktor-client-mock", version.ref = "ktor" } +ktor-client-okhttp = { module = "io.ktor:ktor-client-okhttp", version.ref = "ktor" } [plugins] kotlin-jvm = { id = "org.jetbrains.kotlin.jvm", version.ref = "kotlin" } ben-manes-versions = { id = "com.github.ben-manes.versions", version = "0.47.0" } maven-publish = { id = "com.vanniktech.maven.publish", version = "0.24.0" } +androidLibrary = { id = "com.android.library", version.ref = "agp" } +kotlinAndroid = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" } +kotlinMultiplatform = { id = "org.jetbrains.kotlin.multiplatform", version.ref = "kotlin" } +kotlinCocoapods = { id = "org.jetbrains.kotlin.native.cocoapods", version.ref = "kotlin" } +jetbrainsKotlinAndroid = { id = "org.jetbrains.kotlin.android", version.ref = "kotlinVersion" } diff --git a/settings.gradle.kts b/settings.gradle.kts index fc80838d..6d24a62d 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -1 +1,21 @@ -rootProject.name = "Crux" +pluginManagement { + repositories { + google() + gradlePluginPortal() + mavenCentral() + } +} + +dependencyResolutionManagement { + repositories { + google() + mavenCentral() + } +} + +rootProject.name = "crux" +include(":crux") +include(":crux-kmp") +include(":crux-kmp") +include(":crux-kmp") +include(":crux-kmp") From f549f9399a66a0284a71d49bae54784472bbcb0e Mon Sep 17 00:00:00 2001 From: Vaibhav2002 Date: Fri, 12 Jan 2024 14:25:15 +0530 Subject: [PATCH 2/2] fix: Gradle Sync Failing --- crux-kmp/build.gradle.kts | 9 ++++----- gradle/libs.versions.toml | 2 +- settings.gradle.kts | 3 --- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/crux-kmp/build.gradle.kts b/crux-kmp/build.gradle.kts index 7cc17e06..9e6a898a 100644 --- a/crux-kmp/build.gradle.kts +++ b/crux-kmp/build.gradle.kts @@ -11,7 +11,7 @@ kotlin { } } } - + listOf( iosX64(), iosArm64(), @@ -24,13 +24,12 @@ kotlin { } sourceSets { - commonMain.dependencies { + commonMain{ + dependencies { implementation(libs.ktor.client.core) implementation(libs.ksoup) implementation(libs.klaxon) - } - commonTest.dependencies { - implementation(libs.kotlin.test) + } } } } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 4adc7cb4..583ed1d9 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -9,7 +9,7 @@ espressoCore = "3.0.2" appcompatV7 = "28.0.0" ksoup = "0.0.6" ktor = "2.3.6" -klaxon = "5.6" +klaxon = "5.5" [libraries] kotlin-plugin = { module = "org.jetbrains.kotlin:kotlin-gradle-plugin", version.ref = "kotlin" } diff --git a/settings.gradle.kts b/settings.gradle.kts index 6d24a62d..d68bfcf6 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -16,6 +16,3 @@ dependencyResolutionManagement { rootProject.name = "crux" include(":crux") include(":crux-kmp") -include(":crux-kmp") -include(":crux-kmp") -include(":crux-kmp")