diff --git a/.gitignore b/.gitignore index dd3eafe..cc1226f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ TwitterSourceConnector.properties *.iml /target/ .okhttpcache +.idea diff --git a/README.md b/README.md index 647f256..43eb8ce 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Introduction -This connector uses the twitter streaming api to listen for status update messages and -convert them to a Kafka Connect struct on the fly. The goal is to match as much of the -Twitter Status object as possible. +This connector uses the twitter streaming api to listen for status update messages and +convert them to a Kafka Connect struct on the fly. The goal is to match as much of the +Twitter Tweet object as possible. # Configuration @@ -14,270 +14,26 @@ This Twitter Source connector is used to pull data from Twitter in realtime. name=connector1 tasks.max=1 connector.class=com.github.jcustenborder.kafka.connect.twitter.TwitterSourceConnector - # Set these required values -twitter.oauth.accessTokenSecret= -process.deletes= -filter.keywords= -kafka.status.topic= -kafka.delete.topic= -twitter.oauth.consumerSecret= -twitter.oauth.accessToken= -twitter.oauth.consumerKey= +twitter.bearerToken= +kafka.tweets.topic= +# And optionally these values +filter.rule= +tweet.fields= ``` -| Name | Description | Type | Default | Valid Values | Importance | -|---------------------------------|---------------------------------------------------|----------|---------|--------------|------------| -| filter.keywords | Twitter keywords to filter for. | list | | | high | -| filter.userIds | Twitter user IDs to follow. | list | "" | | low | -| kafka.delete.topic | Kafka topic to write delete events to. | string | | | high | -| kafka.status.topic | Kafka topic to write the statuses to. | string | | | high | -| process.deletes | Should this connector process deletes. | boolean | | | high | -| twitter.oauth.accessToken | OAuth access token | password | | | high | -| twitter.oauth.accessTokenSecret | OAuth access token secret | password | | | high | -| twitter.oauth.consumerKey | OAuth consumer key | password | | | high | -| twitter.oauth.consumerSecret | OAuth consumer secret | password | | | high | -| twitter.debug | Flag to enable debug logging for the twitter api. | boolean | false | | low | - +| Name | Description | Type | +|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| +| twitter.bearerToken | OAuth2 Bearer token with at least Elevated Twitter API access level | password | +| kafka.tweets.topic | Kafka topic to write the tweets to. | string | +| filter.rule | Filtering rules (see https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/integrate/build-a-rule for details). | string | +| tweet.fields | Fields that will be returned for tweet. To fetch all fields, use: attachments,author_id,context_annotations,conversation_id,created_at,edit_controls,edit_history_tweet_ids,
entities,geo,id,in_reply_to_user_id,lang,possibly_sensitive,public_metrics,referenced_tweets,reply_settings,
source,text,withheld | string | # Schemas -## com.github.jcustenborder.kafka.connect.twitter.Place - -Returns the place attached to this status - -| Name | Optional | Schema | Default Value | Documentation | -|---------------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------| -| Name | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| StreetAddress | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| CountryCode | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| Id | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| Country | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| PlaceType | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| FullName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | - -## com.github.jcustenborder.kafka.connect.twitter.GeoLocation - -Returns The location that this tweet refers to if available. - -| Name | Optional | Schema | Default Value | Documentation | -|-----------|----------|---------------------------------------------------------------------------------------------------------|---------------|-------------------------------------------| -| Latitude | false | [Float64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#FLOAT64) | | returns the latitude of the geo location | -| Longitude | false | [Float64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#FLOAT64) | | returns the longitude of the geo location | - -## com.github.jcustenborder.kafka.connect.twitter.StatusDeletionNotice - -Message that is received when a status is deleted from Twitter. - -| Name | Optional | Schema | Default Value | Documentation | -|----------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------| -| StatusId | false | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | -| UserId | false | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | - -## com.github.jcustenborder.kafka.connect.twitter.StatusDeletionNoticeKey - -Key for a message that is received when a status is deleted from Twitter. - -| Name | Optional | Schema | Default Value | Documentation | -|----------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------| -| StatusId | false | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | - -## com.github.jcustenborder.kafka.connect.twitter.StatusKey - -Key for a twitter status. - -| Name | Optional | Schema | Default Value | Documentation | -|------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------| -| Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | - -## com.github.jcustenborder.kafka.connect.twitter.Status - -Twitter status message. - -| Name | Optional | Schema | Default Value | Documentation | -|----------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------------------------------------------------------------------------------------------------------------------------------| -| CreatedAt | true | [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Timestamp.html) | | Return the created_at | -| Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the status | -| Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the text of the status | -| Source | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the source | -| Truncated | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the status is truncated | -| InReplyToStatusId | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the in_reply_tostatus_id | -| InReplyToUserId | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the in_reply_user_id | -| InReplyToScreenName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the in_reply_to_screen_name | -| GeoLocation | true | [com.github.jcustenborder.kafka.connect.twitter.GeoLocation](#com.github.jcustenborder.kafka.connect.twitter.GeoLocation) | | Returns The location that this tweet refers to if available. | -| Place | true | [com.github.jcustenborder.kafka.connect.twitter.Place](#com.github.jcustenborder.kafka.connect.twitter.Place) | | Returns the place attached to this status | -| Favorited | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the status is favorited | -| Retweeted | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the status is retweeted | -| FavoriteCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Indicates approximately how many times this Tweet has been "favorited" by Twitter users. | -| User | false | [com.github.jcustenborder.kafka.connect.twitter.User](#com.github.jcustenborder.kafka.connect.twitter.User) | | Return the user associated with the status. -This can be null if the instance is from User.getStatus(). | -| Retweet | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | -| Contributors | false | Array of [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns an array of contributors, or null if no contributor is associated with this status. | -| RetweetCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of times this tweet has been retweeted, or -1 when the tweet was created before this feature was enabled. | -| RetweetedByMe | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | -| CurrentUserRetweetId | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the authenticating user's retweet's id of this tweet, or -1L when the tweet was created before this feature was enabled. | -| PossiblySensitive | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | -| Lang | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the lang of the status text if available. | -| WithheldInCountries | false | Array of [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the list of country codes where the tweet is withheld | -| HashtagEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.HashtagEntity](#com.github.jcustenborder.kafka.connect.twitter.HashtagEntity) | | Returns an array if hashtag mentioned in the tweet. | -| UserMentionEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.UserMentionEntity](#com.github.jcustenborder.kafka.connect.twitter.UserMentionEntity) | | Returns an array of user mentions in the tweet. | -| MediaEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.MediaEntity](#com.github.jcustenborder.kafka.connect.twitter.MediaEntity) | | Returns an array of MediaEntities if medias are available in the tweet. | -| SymbolEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.SymbolEntity](#com.github.jcustenborder.kafka.connect.twitter.SymbolEntity) | | Returns an array of SymbolEntities if medias are available in the tweet. | -| URLEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.URLEntity](#com.github.jcustenborder.kafka.connect.twitter.URLEntity) | | Returns an array if URLEntity mentioned in the tweet. | - -## com.github.jcustenborder.kafka.connect.twitter.User - -Return the user associated with the status. -This can be null if the instance is from User.getStatus(). - -| Name | Optional | Schema | Default Value | Documentation | -|--------------------------------|----------|----------------------------------------------------------------------------------------------------------------|---------------|----------------------------------------------------------------------------------------------| -| Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the user | -| Name | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the name of the user | -| ScreenName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the screen name of the user | -| Location | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the location of the user | -| Description | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the description of the user | -| ContributorsEnabled | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Tests if the user is enabling contributors | -| ProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the profile image url of the user | -| BiggerProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| MiniProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| OriginalProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| BiggerProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| MiniProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| OriginalProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| DefaultProfileImage | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Tests if the user has not uploaded their own avatar | -| URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the url of the user | -| Protected | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the user status is protected | -| FollowersCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of followers | -| ProfileBackgroundColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileTextColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileLinkColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileSidebarFillColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileSidebarBorderColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileUseBackgroundImage | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | -| DefaultProfile | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Tests if the user has not altered the theme or background | -| ShowAllInlineMedia | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | -| FriendsCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of users the user follows (AKA "followings") | -| CreatedAt | true | [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Timestamp.html) | | | -| FavouritesCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | -| UtcOffset | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | -| TimeZone | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileBackgroundImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileBackgroundImageUrlHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileBannerURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileBannerRetinaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileBannerIPadURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileBannerIPadRetinaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileBannerMobileURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileBannerMobileRetinaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| ProfileBackgroundTiled | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | -| Lang | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the preferred language of the user | -| StatusesCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | -| GeoEnabled | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | -| Verified | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | -| Translator | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | -| ListedCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of public lists the user is listed on, or -1 if the count is unavailable. | -| FollowRequestSent | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Returns true if the authenticating user has requested to follow this user, otherwise false. | -| WithheldInCountries | false | Array of [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the list of country codes where the user is withheld | - -## com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant - -| Name | Optional | Schema | Default Value | Documentation | -|-------------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------| -| Url | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| Bitrate | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | -| ContentType | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | - -## com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size - -| Name | Optional | Schema | Default Value | Documentation | -|--------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------| -| Resize | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | -| Width | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | -| Height | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | - -## com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity - -| Name | Optional | Schema | Default Value | Documentation | -|------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------| -| VideoAspectRatioWidth | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | -| VideoAspectRatioHeight | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | -| VideoDurationMillis | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | -| VideoVariants | true | Array of [com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant](#com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant) | | | -| ExtAltText | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the media. | -| Type | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media type photo, video, animated_gif. | -| MediaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media URL. | -| Sizes | false | Map of <[Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32), [com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size](#com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size)> | | Returns size variations of the media. | -| MediaURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media secure URL. | -| URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | -| Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | -| ExpandedURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the expanded URL if mentioned URL is shorten. | -| Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the URL mentioned in the tweet. | -| End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the URL mentioned in the tweet. | -| DisplayURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the display URL if mentioned URL is shorten. | - -## com.github.jcustenborder.kafka.connect.twitter.HashtagEntity - -| Name | Optional | Schema | Default Value | Documentation | -|-------|----------|-------------------------------------------------------------------------------------------------------|---------------|----------------------------------------------------------| -| Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the text of the hashtag without #. | -| Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the hashtag. | -| End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the hashtag. | - -## com.github.jcustenborder.kafka.connect.twitter.MediaEntity - -| Name | Optional | Schema | Default Value | Documentation | -|------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------| -| Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the media. | -| Type | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media type photo, video, animated_gif. | -| MediaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media URL. | -| Sizes | false | Map of <[Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32), [com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size](#com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size)> | | | -| MediaURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media secure URL. | -| VideoAspectRatioWidth | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | -| VideoAspectRatioHeight | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | -| VideoDurationMillis | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | -| VideoVariants | true | Array of [com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant](#com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant) | | Returns size variations of the media. | -| ExtAltText | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | -| URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | -| Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | -| ExpandedURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the expanded URL if mentioned URL is shorten. | -| Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the URL mentioned in the tweet. | -| End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the URL mentioned in the tweet. | -| DisplayURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the display URL if mentioned URL is shorten. | - -## com.github.jcustenborder.kafka.connect.twitter.SymbolEntity - -| Name | Optional | Schema | Default Value | Documentation | -|-------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------------------------------------------------| -| Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the symbol. | -| End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the symbol. | -| Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the text of the entity | - -## com.github.jcustenborder.kafka.connect.twitter.URLEntity - -| Name | Optional | Schema | Default Value | Documentation | -|-------------|----------|-------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------| -| URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | -| Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | -| ExpandedURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the expanded URL if mentioned URL is shorten. | -| Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the URL mentioned in the tweet. | -| End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the URL mentioned in the tweet. | -| DisplayURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the display URL if mentioned URL is shorten. | - -## com.github.jcustenborder.kafka.connect.twitter.UserMentionEntity - -| Name | Optional | Schema | Default Value | Documentation | -|------------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------------------------------------------------------| -| Name | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the name mentioned in the status. | -| Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the user id mentioned in the status. | -| Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the screen name mentioned in the status. | -| ScreenName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the screen name mentioned in the status. | -| Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the user mention. | -| End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the user mention. | - +Schema is almost the same as `#/components/schemas/Tweet` json schema included in https://api.twitter.com/2/openapi.json - +the only difference is that it is translated to Kafka connect schema. See `com.github.jcustenborder.kafka.connect.twitter.TweetConverter` +for details. # Running in development diff --git a/pom.xml b/pom.xml index b4cc64a..a1c5427 100644 --- a/pom.xml +++ b/pom.xml @@ -30,6 +30,14 @@ Committer + + arkadius + Arek Burdach + https://github.com/arkadius + + Committer + + scm:git:https://github.com/jcustenborder/kafka-connect-twitter.git @@ -40,19 +48,17 @@ github https://github.com/jcustenborder/kafka-connect-twitter/issues - - 4.0.6 - - org.twitter4j - twitter4j-core - ${twitter4j.version} + com.twitter + twitter-api-java-sdk + 2.0.3 - org.twitter4j - twitter4j-stream - ${twitter4j.version} + io.confluent + kafka-connect-avro-converter + 7.3.0 + test @@ -75,6 +81,11 @@ Support provided through community involvement. + + org.apache.maven.plugins + maven-checkstyle-plugin + 3.1.2 + diff --git a/src/main/java/com/github/jcustenborder/kafka/connect/twitter/StatusConverter.java b/src/main/java/com/github/jcustenborder/kafka/connect/twitter/StatusConverter.java deleted file mode 100644 index 7b05579..0000000 --- a/src/main/java/com/github/jcustenborder/kafka/connect/twitter/StatusConverter.java +++ /dev/null @@ -1,648 +0,0 @@ -/** - * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.github.jcustenborder.kafka.connect.twitter; - -import org.apache.kafka.connect.data.Schema; -import org.apache.kafka.connect.data.SchemaBuilder; -import org.apache.kafka.connect.data.Struct; -import org.apache.kafka.connect.data.Timestamp; -import twitter4j.ExtendedMediaEntity; -import twitter4j.GeoLocation; -import twitter4j.HashtagEntity; -import twitter4j.MediaEntity; -import twitter4j.Place; -import twitter4j.Status; -import twitter4j.StatusDeletionNotice; -import twitter4j.SymbolEntity; -import twitter4j.URLEntity; -import twitter4j.User; -import twitter4j.UserMentionEntity; - -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - -public class StatusConverter { - - - public final static Schema PLACE_SCHEMA; - public final static Schema GEO_LOCATION_SCHEMA; - public static final Schema SCHEMA_STATUS_DELETION_NOTICE; - public static final Schema SCHEMA_STATUS_DELETION_NOTICE_KEY; - public static final Schema STATUS_SCHEMA_KEY; - public static final Schema STATUS_SCHEMA; - - public static final Schema USER_SCHEMA = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.User") - .doc("Return the user associated with the status. This can be null if the instance is from User.getStatus().") - .field("Id", SchemaBuilder.int64().doc("Returns the id of the user").optional().build()) - .field("Name", SchemaBuilder.string().doc("Returns the name of the user").optional().build()) - .field("ScreenName", SchemaBuilder.string().doc("Returns the screen name of the user").optional().build()) - .field("Location", SchemaBuilder.string().doc("Returns the location of the user").optional().build()) - .field("Description", SchemaBuilder.string().doc("Returns the description of the user").optional().build()) - .field("ContributorsEnabled", SchemaBuilder.bool().doc("Tests if the user is enabling contributors").optional().build()) - .field("ProfileImageURL", SchemaBuilder.string().doc("Returns the profile image url of the user").optional().build()) - .field("BiggerProfileImageURL", SchemaBuilder.string().optional().build()) - .field("MiniProfileImageURL", SchemaBuilder.string().optional().build()) - .field("OriginalProfileImageURL", SchemaBuilder.string().optional().build()) - .field("ProfileImageURLHttps", SchemaBuilder.string().optional().build()) - .field("BiggerProfileImageURLHttps", SchemaBuilder.string().optional().build()) - .field("MiniProfileImageURLHttps", SchemaBuilder.string().optional().build()) - .field("OriginalProfileImageURLHttps", SchemaBuilder.string().optional().build()) - .field("DefaultProfileImage", SchemaBuilder.bool().doc("Tests if the user has not uploaded their own avatar").optional().build()) - .field("URL", SchemaBuilder.string().doc("Returns the url of the user").optional().build()) - .field("Protected", SchemaBuilder.bool().doc("Test if the user status is protected").optional().build()) - .field("FollowersCount", SchemaBuilder.int32().doc("Returns the number of followers").optional().build()) - .field("ProfileBackgroundColor", SchemaBuilder.string().optional().build()) - .field("ProfileTextColor", SchemaBuilder.string().optional().build()) - .field("ProfileLinkColor", SchemaBuilder.string().optional().build()) - .field("ProfileSidebarFillColor", SchemaBuilder.string().optional().build()) - .field("ProfileSidebarBorderColor", SchemaBuilder.string().optional().build()) - .field("ProfileUseBackgroundImage", SchemaBuilder.bool().optional().build()) - .field("DefaultProfile", SchemaBuilder.bool().doc("Tests if the user has not altered the theme or background").optional().build()) - .field("ShowAllInlineMedia", SchemaBuilder.bool().optional().build()) - .field("FriendsCount", SchemaBuilder.int32().doc("Returns the number of users the user follows (AKA \"followings\")").optional().build()) - .field("CreatedAt", Timestamp.builder().optional().build()) - .field("FavouritesCount", SchemaBuilder.int32().optional().build()) - .field("UtcOffset", SchemaBuilder.int32().optional().build()) - .field("TimeZone", SchemaBuilder.string().optional().build()) - .field("ProfileBackgroundImageURL", SchemaBuilder.string().optional().build()) - .field("ProfileBackgroundImageUrlHttps", SchemaBuilder.string().optional().build()) - .field("ProfileBannerURL", SchemaBuilder.string().optional().build()) - .field("ProfileBannerRetinaURL", SchemaBuilder.string().optional().build()) - .field("ProfileBannerIPadURL", SchemaBuilder.string().optional().build()) - .field("ProfileBannerIPadRetinaURL", SchemaBuilder.string().optional().build()) - .field("ProfileBannerMobileURL", SchemaBuilder.string().optional().build()) - .field("ProfileBannerMobileRetinaURL", SchemaBuilder.string().optional().build()) - .field("ProfileBackgroundTiled", SchemaBuilder.bool().optional().build()) - .field("Lang", SchemaBuilder.string().doc("Returns the preferred language of the user").optional().build()) - .field("StatusesCount", SchemaBuilder.int32().optional().build()) - .field("GeoEnabled", SchemaBuilder.bool().optional().build()) - .field("Verified", SchemaBuilder.bool().optional().build()) - .field("Translator", SchemaBuilder.bool().optional().build()) - .field("ListedCount", SchemaBuilder.int32().doc("Returns the number of public lists the user is listed on, or -1 if the count is unavailable.").optional().build()) - .field("FollowRequestSent", SchemaBuilder.bool().doc("Returns true if the authenticating user has requested to follow this user, otherwise false.").optional().build()) - .field("WithheldInCountries", SchemaBuilder.array(Schema.STRING_SCHEMA).doc("Returns the list of country codes where the user is withheld").build()) - .build(); - - static { - PLACE_SCHEMA = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.Place") - .optional() - .doc("Returns the place attached to this status") - .field("Name", SchemaBuilder.string().optional().build()) - .field("StreetAddress", SchemaBuilder.string().optional().build()) - .field("CountryCode", SchemaBuilder.string().optional().build()) - .field("Id", SchemaBuilder.string().optional().build()) - .field("Country", SchemaBuilder.string().optional().build()) - .field("PlaceType", SchemaBuilder.string().optional().build()) - .field("URL", SchemaBuilder.string().optional().build()) - .field("FullName", SchemaBuilder.string().optional().build()) - .build(); - } - - static { - GEO_LOCATION_SCHEMA = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.GeoLocation") - .optional() - .doc("Returns The location that this tweet refers to if available.") - .field("Latitude", SchemaBuilder.float64().doc("returns the latitude of the geo location").build()) - .field("Longitude", SchemaBuilder.float64().doc("returns the longitude of the geo location").build()) - .build(); - } - - static { - STATUS_SCHEMA_KEY = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.StatusKey") - .doc("Key for a twitter status.") - .field("Id", Schema.OPTIONAL_INT64_SCHEMA) - .build(); - } - - public static final Schema SCHEMA_MEDIA_ENTITY_VARIANT = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant") - .doc("") - .field("Url", SchemaBuilder.string().optional().doc("").build()) - .field("Bitrate", SchemaBuilder.int32().optional().doc("").build()) - .field("ContentType", SchemaBuilder.string().optional().doc("").build()) - .build(); - public static final Schema SCHEMA_MEDIA_ENTITY_SIZE = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size") - .doc("") - .field("Resize", SchemaBuilder.int32().optional().doc("").build()) - .field("Width", SchemaBuilder.int32().optional().doc("").build()) - .field("Height", SchemaBuilder.int32().optional().doc("").build()) - .build(); - public static final Schema SCHEMA_EXTENDED_MEDIA_ENTITY = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity") - .doc("") - .field("VideoAspectRatioWidth", SchemaBuilder.int32().optional().doc("").build()) - .field("VideoAspectRatioHeight", SchemaBuilder.int32().optional().doc("").build()) - .field("VideoDurationMillis", SchemaBuilder.int64().optional().doc("").build()) - .field("VideoVariants", SchemaBuilder.array(SCHEMA_MEDIA_ENTITY_VARIANT).optional().doc("").build()) - .field("ExtAltText", SchemaBuilder.string().optional().doc("").build()) - .field("Id", SchemaBuilder.int64().optional().doc("Returns the id of the media.").build()) - .field("Type", SchemaBuilder.string().optional().doc("Returns the media type photo, video, animated_gif.").build()) - .field("MediaURL", SchemaBuilder.string().optional().doc("Returns the media URL.").build()) - .field("Sizes", SchemaBuilder.map(Schema.INT32_SCHEMA, SCHEMA_MEDIA_ENTITY_SIZE).doc("Returns size variations of the media.").build()) - .field("MediaURLHttps", SchemaBuilder.string().optional().doc("Returns the media secure URL.").build()) - .field("URL", SchemaBuilder.string().optional().doc("Returns the URL mentioned in the tweet.").build()) - .field("Text", SchemaBuilder.string().optional().doc("Returns the URL mentioned in the tweet.").build()) - .field("ExpandedURL", SchemaBuilder.string().optional().doc("Returns the expanded URL if mentioned URL is shorten.").build()) - .field("Start", SchemaBuilder.int32().optional().doc("Returns the index of the start character of the URL mentioned in the tweet.").build()) - .field("End", SchemaBuilder.int32().optional().doc("Returns the index of the end character of the URL mentioned in the tweet.").build()) - .field("DisplayURL", SchemaBuilder.string().optional().doc("Returns the display URL if mentioned URL is shorten.").build()) - .build(); - public static final Schema SCHEMA_HASHTAG_ENTITY = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.HashtagEntity") - .doc("") - .field("Text", SchemaBuilder.string().optional().doc("Returns the text of the hashtag without #.").build()) - .field("Start", SchemaBuilder.int32().optional().doc("Returns the index of the start character of the hashtag.").build()) - .field("End", SchemaBuilder.int32().optional().doc("Returns the index of the end character of the hashtag.").build()) - .build(); - public static final Schema SCHEMA_MEDIA_ENTITY = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.MediaEntity") - .doc("") - .field("Id", SchemaBuilder.int64().optional().doc("Returns the id of the media.").build()) - .field("Type", SchemaBuilder.string().optional().doc("Returns the media type photo, video, animated_gif.").build()) - .field("MediaURL", SchemaBuilder.string().optional().doc("Returns the media URL.").build()) - .field("Sizes", SchemaBuilder.map(Schema.INT32_SCHEMA, SCHEMA_MEDIA_ENTITY_SIZE)) - .field("MediaURLHttps", SchemaBuilder.string().optional().doc("Returns the media secure URL.").build()) - .field("VideoAspectRatioWidth", SchemaBuilder.int32().optional().doc("").build()) - .field("VideoAspectRatioHeight", SchemaBuilder.int32().optional().doc("").build()) - .field("VideoDurationMillis", SchemaBuilder.int64().optional().doc("").build()) - .field("VideoVariants", SchemaBuilder.array(SCHEMA_MEDIA_ENTITY_VARIANT).optional().doc("Returns size variations of the media.").build()) - .field("ExtAltText", SchemaBuilder.string().optional().doc("").build()) - .field("URL", SchemaBuilder.string().optional().doc("Returns the URL mentioned in the tweet.").build()) - .field("Text", SchemaBuilder.string().optional().doc("Returns the URL mentioned in the tweet.").build()) - .field("ExpandedURL", SchemaBuilder.string().optional().doc("Returns the expanded URL if mentioned URL is shorten.").build()) - .field("Start", SchemaBuilder.int32().optional().doc("Returns the index of the start character of the URL mentioned in the tweet.").build()) - .field("End", SchemaBuilder.int32().optional().doc("Returns the index of the end character of the URL mentioned in the tweet.").build()) - .field("DisplayURL", SchemaBuilder.string().optional().doc("Returns the display URL if mentioned URL is shorten.").build()) - .build(); - public static final Schema SCHEMA_SYMBOL_ENTITY = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.SymbolEntity") - .doc("") - .field("Start", SchemaBuilder.int32().optional().doc("Returns the index of the start character of the symbol.").build()) - .field("End", SchemaBuilder.int32().optional().doc("Returns the index of the end character of the symbol.").build()) - .field("Text", SchemaBuilder.string().optional().doc("Returns the text of the entity").build()) - .build(); - public static final Schema SCHEMA_URL_ENTITY = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.URLEntity") - .doc("") - .field("URL", SchemaBuilder.string().optional().doc("Returns the URL mentioned in the tweet.").build()) - .field("Text", SchemaBuilder.string().optional().doc("Returns the URL mentioned in the tweet.").build()) - .field("ExpandedURL", SchemaBuilder.string().optional().doc("Returns the expanded URL if mentioned URL is shorten.").build()) - .field("Start", SchemaBuilder.int32().optional().doc("Returns the index of the start character of the URL mentioned in the tweet.").build()) - .field("End", SchemaBuilder.int32().optional().doc("Returns the index of the end character of the URL mentioned in the tweet.").build()) - .field("DisplayURL", SchemaBuilder.string().optional().doc("Returns the display URL if mentioned URL is shorten.").build()) - .build(); - public static final Schema SCHEMA_USER_MENTION_ENTITY = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.UserMentionEntity") - .doc("") - .field("Name", SchemaBuilder.string().optional().doc("Returns the name mentioned in the status.").build()) - .field("Id", SchemaBuilder.int64().optional().doc("Returns the user id mentioned in the status.").build()) - .field("Text", SchemaBuilder.string().optional().doc("Returns the screen name mentioned in the status.").build()) - .field("ScreenName", SchemaBuilder.string().optional().doc("Returns the screen name mentioned in the status.").build()) - .field("Start", SchemaBuilder.int32().optional().doc("Returns the index of the start character of the user mention.").build()) - .field("End", SchemaBuilder.int32().optional().doc("Returns the index of the end character of the user mention.").build()) - .build(); - - static { - STATUS_SCHEMA = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.Status") - .doc("Twitter status message.") - .field("CreatedAt", Timestamp.builder().doc("Return the created_at").optional().build()) - .field("Id", SchemaBuilder.int64().doc("Returns the id of the status").optional().build()) - .field("Text", SchemaBuilder.string().doc("Returns the text of the status").optional().build()) - .field("Source", SchemaBuilder.string().doc("Returns the source").optional().build()) - .field("Truncated", SchemaBuilder.bool().doc("Test if the status is truncated").optional().build()) - .field("InReplyToStatusId", SchemaBuilder.int64().doc("Returns the in_reply_tostatus_id").optional().build()) - .field("InReplyToUserId", SchemaBuilder.int64().doc("Returns the in_reply_user_id").optional().build()) - .field("InReplyToScreenName", SchemaBuilder.string().doc("Returns the in_reply_to_screen_name").optional().build()) - .field("GeoLocation", GEO_LOCATION_SCHEMA) - .field("Place", PLACE_SCHEMA) - .field("Favorited", SchemaBuilder.bool().doc("Test if the status is favorited").optional().build()) - .field("Retweeted", SchemaBuilder.bool().doc("Test if the status is retweeted").optional().build()) - .field("FavoriteCount", SchemaBuilder.int32().doc("Indicates approximately how many times this Tweet has been \"favorited\" by Twitter users.").optional().build()) - .field("User", USER_SCHEMA) - .field("Retweet", SchemaBuilder.bool().optional().build()) - .field("Contributors", SchemaBuilder.array(Schema.INT64_SCHEMA).doc("Returns an array of contributors, or null if no contributor is associated with this status.").build()) - .field("RetweetCount", SchemaBuilder.int32().doc("Returns the number of times this tweet has been retweeted, or -1 when the tweet was created before this feature was enabled.").optional().build()) - .field("RetweetedByMe", SchemaBuilder.bool().optional().build()) - .field("CurrentUserRetweetId", SchemaBuilder.int64().doc("Returns the authenticating user's retweet's id of this tweet, or -1L when the tweet was created before this feature was enabled.").optional().build()) - .field("PossiblySensitive", SchemaBuilder.bool().optional().build()) - .field("Lang", SchemaBuilder.string().doc("Returns the lang of the status text if available.").optional().build()) - .field("WithheldInCountries", SchemaBuilder.array(Schema.STRING_SCHEMA).doc("Returns the list of country codes where the tweet is withheld").build()) - .field("HashtagEntities", SchemaBuilder.array(SCHEMA_HASHTAG_ENTITY).doc("Returns an array if hashtag mentioned in the tweet.").optional().build()) - .field("UserMentionEntities", SchemaBuilder.array(SCHEMA_USER_MENTION_ENTITY).doc("Returns an array of user mentions in the tweet.").optional().build()) - .field("MediaEntities", SchemaBuilder.array(SCHEMA_MEDIA_ENTITY).doc("Returns an array of MediaEntities if medias are available in the tweet.").optional().build()) - .field("SymbolEntities", SchemaBuilder.array(SCHEMA_SYMBOL_ENTITY).doc("Returns an array of SymbolEntities if medias are available in the tweet.").optional().build()) - .field("URLEntities", SchemaBuilder.array(SCHEMA_URL_ENTITY).doc("Returns an array if URLEntity mentioned in the tweet.").optional().build()) - - .build(); - } - - static { - SCHEMA_STATUS_DELETION_NOTICE = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.StatusDeletionNotice") - .doc("Message that is received when a status is deleted from Twitter.") - .field("StatusId", Schema.INT64_SCHEMA) - .field("UserId", Schema.INT64_SCHEMA) - .build(); - } - - static { - SCHEMA_STATUS_DELETION_NOTICE_KEY = SchemaBuilder.struct() - .name("com.github.jcustenborder.kafka.connect.twitter.StatusDeletionNoticeKey") - .doc("Key for a message that is received when a status is deleted from Twitter.") - .field("StatusId", Schema.INT64_SCHEMA) - .build(); - } - - static Map convertSizes(Map items) { - Map results = new LinkedHashMap<>(); - - if (items == null) { - return results; - } - - for (Map.Entry kvp : items.entrySet()) { - results.put(kvp.getKey(), convertMediaEntitySize(kvp.getValue())); - } - - return results; - } - - public static void convert(User user, Struct struct) { - struct - .put("Id", user.getId()) - .put("Name", user.getName()) - .put("ScreenName", user.getScreenName()) - .put("Location", user.getLocation()) - .put("Description", user.getDescription()) - .put("ContributorsEnabled", user.isContributorsEnabled()) - .put("ProfileImageURL", user.getProfileImageURL()) - .put("BiggerProfileImageURL", user.getBiggerProfileImageURL()) - .put("MiniProfileImageURL", user.getMiniProfileImageURL()) - .put("OriginalProfileImageURL", user.getOriginalProfileImageURL()) - .put("ProfileImageURLHttps", user.getProfileImageURLHttps()) - .put("BiggerProfileImageURLHttps", user.getBiggerProfileImageURLHttps()) - .put("MiniProfileImageURLHttps", user.getMiniProfileImageURLHttps()) - .put("OriginalProfileImageURLHttps", user.getOriginalProfileImageURLHttps()) - .put("DefaultProfileImage", user.isDefaultProfileImage()) - .put("URL", user.getURL()) - .put("Protected", user.isProtected()) - .put("FollowersCount", user.getFollowersCount()) - .put("ProfileBackgroundColor", user.getProfileBackgroundColor()) - .put("ProfileTextColor", user.getProfileTextColor()) - .put("ProfileLinkColor", user.getProfileLinkColor()) - .put("ProfileSidebarFillColor", user.getProfileSidebarFillColor()) - .put("ProfileSidebarBorderColor", user.getProfileSidebarBorderColor()) - .put("ProfileUseBackgroundImage", user.isProfileUseBackgroundImage()) - .put("DefaultProfile", user.isDefaultProfile()) - .put("ShowAllInlineMedia", user.isShowAllInlineMedia()) - .put("FriendsCount", user.getFriendsCount()) - .put("CreatedAt", user.getCreatedAt()) - .put("FavouritesCount", user.getFavouritesCount()) - .put("UtcOffset", user.getUtcOffset()) - .put("TimeZone", user.getTimeZone()) - .put("ProfileBackgroundImageURL", user.getProfileBackgroundImageURL()) - .put("ProfileBackgroundImageUrlHttps", user.getProfileBackgroundImageUrlHttps()) - .put("ProfileBannerURL", user.getProfileBannerURL()) - .put("ProfileBannerRetinaURL", user.getProfileBannerRetinaURL()) - .put("ProfileBannerIPadURL", user.getProfileBannerIPadURL()) - .put("ProfileBannerIPadRetinaURL", user.getProfileBannerIPadRetinaURL()) - .put("ProfileBannerMobileURL", user.getProfileBannerMobileURL()) - .put("ProfileBannerMobileRetinaURL", user.getProfileBannerMobileRetinaURL()) - .put("ProfileBackgroundTiled", user.isProfileBackgroundTiled()) - .put("Lang", user.getLang()) - .put("StatusesCount", user.getStatusesCount()) - .put("GeoEnabled", user.isGeoEnabled()) - .put("Verified", user.isVerified()) - .put("Translator", user.isTranslator()) - .put("ListedCount", user.getListedCount()) - .put("FollowRequestSent", user.isFollowRequestSent()); - - List withheldInCountries = new ArrayList<>(); - if (null != user.getWithheldInCountries()) { - for (String s : user.getWithheldInCountries()) { - withheldInCountries.add(s); - } - } - struct.put("WithheldInCountries", withheldInCountries); - - } - - public static void convert(Place place, Struct struct) { - if (null == place) { - return; - } - struct.put("Name", place.getName()) - .put("StreetAddress", place.getStreetAddress()) - .put("CountryCode", place.getCountryCode()) - .put("Id", place.getId()) - .put("Country", place.getCountry()) - .put("PlaceType", place.getPlaceType()) - .put("URL", place.getURL()) - .put("FullName", place.getFullName()); - } - - public static void convert(GeoLocation geoLocation, Struct struct) { - if (null == geoLocation) { - return; - } - struct.put("Latitude", geoLocation.getLatitude()) - .put("Longitude", geoLocation.getLongitude()); - } - - - static Struct convertMediaEntityVariant(MediaEntity.Variant variant) { - return new Struct(SCHEMA_MEDIA_ENTITY_VARIANT) - .put("Url", variant.getUrl()) - .put("Bitrate", variant.getBitrate()) - .put("ContentType", variant.getContentType()); - } - - public static List convert(MediaEntity.Variant[] items) { - List result = new ArrayList<>(); - if (null == items) { - return result; - } - for (MediaEntity.Variant item : items) { - Struct struct = convertMediaEntityVariant(item); - result.add(struct); - } - return result; - } - - - static Struct convertMediaEntitySize(MediaEntity.Size size) { - return new Struct(SCHEMA_MEDIA_ENTITY_SIZE) - .put("Resize", size.getResize()) - .put("Width", size.getWidth()) - .put("Height", size.getHeight()); - } - - public static List convert(MediaEntity.Size[] items) { - List result = new ArrayList<>(); - if (null == items) { - return result; - } - for (MediaEntity.Size item : items) { - Struct struct = convertMediaEntitySize(item); - result.add(struct); - } - return result; - } - - - static Struct convertExtendedMediaEntity(ExtendedMediaEntity extendedMediaEntity) { - return new Struct(SCHEMA_EXTENDED_MEDIA_ENTITY) - .put("VideoAspectRatioWidth", extendedMediaEntity.getVideoAspectRatioWidth()) - .put("VideoAspectRatioHeight", extendedMediaEntity.getVideoAspectRatioHeight()) - .put("VideoDurationMillis", extendedMediaEntity.getVideoDurationMillis()) - .put("VideoVariants", extendedMediaEntity.getVideoVariants()) - .put("ExtAltText", extendedMediaEntity.getExtAltText()) - .put("Id", extendedMediaEntity.getId()) - .put("Type", extendedMediaEntity.getType()) - .put("MediaURL", extendedMediaEntity.getMediaURL()) - .put("Sizes", extendedMediaEntity.getSizes()) - .put("MediaURLHttps", extendedMediaEntity.getMediaURLHttps()) - .put("URL", extendedMediaEntity.getURL()) - .put("Text", extendedMediaEntity.getText()) - .put("ExpandedURL", extendedMediaEntity.getExpandedURL()) - .put("Start", extendedMediaEntity.getStart()) - .put("End", extendedMediaEntity.getEnd()) - .put("DisplayURL", extendedMediaEntity.getDisplayURL()); - } - - public static List convert(ExtendedMediaEntity[] items) { - List result = new ArrayList<>(); - if (null == items) { - return result; - } - for (ExtendedMediaEntity item : items) { - Struct struct = convertExtendedMediaEntity(item); - result.add(struct); - } - return result; - } - - - static Struct convertHashtagEntity(HashtagEntity hashtagEntity) { - return new Struct(SCHEMA_HASHTAG_ENTITY) - .put("Text", hashtagEntity.getText()) - .put("Start", hashtagEntity.getStart()) - .put("End", hashtagEntity.getEnd()); - } - - public static List convert(HashtagEntity[] items) { - List result = new ArrayList<>(); - if (null == items) { - return result; - } - for (HashtagEntity item : items) { - Struct struct = convertHashtagEntity(item); - result.add(struct); - } - return result; - } - - - static Struct convertMediaEntity(MediaEntity mediaEntity) { - return new Struct(SCHEMA_MEDIA_ENTITY) - .put("Id", mediaEntity.getId()) - .put("Type", mediaEntity.getType()) - .put("MediaURL", mediaEntity.getMediaURL()) - .put("Sizes", convertSizes(mediaEntity.getSizes())) - .put("MediaURLHttps", mediaEntity.getMediaURLHttps()) - .put("VideoAspectRatioWidth", mediaEntity.getVideoAspectRatioWidth()) - .put("VideoAspectRatioHeight", mediaEntity.getVideoAspectRatioHeight()) - .put("VideoDurationMillis", mediaEntity.getVideoDurationMillis()) - .put("VideoVariants", convert(mediaEntity.getVideoVariants())) - .put("ExtAltText", mediaEntity.getExtAltText()) - .put("URL", mediaEntity.getURL()) - .put("Text", mediaEntity.getText()) - .put("ExpandedURL", mediaEntity.getExpandedURL()) - .put("Start", mediaEntity.getStart()) - .put("End", mediaEntity.getEnd()) - .put("DisplayURL", mediaEntity.getDisplayURL()); - } - - public static List convert(MediaEntity[] items) { - List result = new ArrayList<>(); - if (null == items) { - return result; - } - for (MediaEntity item : items) { - Struct struct = convertMediaEntity(item); - result.add(struct); - } - return result; - } - - - static Struct convertSymbolEntity(SymbolEntity symbolEntity) { - return new Struct(SCHEMA_SYMBOL_ENTITY) - .put("Start", symbolEntity.getStart()) - .put("End", symbolEntity.getEnd()) - .put("Text", symbolEntity.getText()); - } - - public static List convert(SymbolEntity[] items) { - List result = new ArrayList<>(); - if (null == items) { - return result; - } - for (SymbolEntity item : items) { - Struct struct = convertSymbolEntity(item); - result.add(struct); - } - return result; - } - - - static Struct convertURLEntity(URLEntity uRLEntity) { - return new Struct(SCHEMA_URL_ENTITY) - .put("URL", uRLEntity.getURL()) - .put("Text", uRLEntity.getText()) - .put("ExpandedURL", uRLEntity.getExpandedURL()) - .put("Start", uRLEntity.getStart()) - .put("End", uRLEntity.getEnd()) - .put("DisplayURL", uRLEntity.getDisplayURL()); - } - - public static List convert(URLEntity[] items) { - List result = new ArrayList<>(); - if (null == items) { - return result; - } - for (URLEntity item : items) { - Struct struct = convertURLEntity(item); - result.add(struct); - } - return result; - } - - - static Struct convertUserMentionEntity(UserMentionEntity userMentionEntity) { - return new Struct(SCHEMA_USER_MENTION_ENTITY) - .put("Name", userMentionEntity.getName()) - .put("Id", userMentionEntity.getId()) - .put("Text", userMentionEntity.getText()) - .put("ScreenName", userMentionEntity.getScreenName()) - .put("Start", userMentionEntity.getStart()) - .put("End", userMentionEntity.getEnd()); - } - - public static List convert(UserMentionEntity[] items) { - List result = new ArrayList<>(); - if (null == items) { - return result; - } - for (UserMentionEntity item : items) { - Struct struct = convertUserMentionEntity(item); - result.add(struct); - } - return result; - } - - - public static void convertKey(Status status, Struct struct) { - struct.put("Id", status.getId()); - } - - public static void convert(Status status, Struct struct) { - struct - .put("CreatedAt", status.getCreatedAt()) - .put("Id", status.getId()) - .put("Text", status.getText()) - .put("Source", status.getSource()) - .put("Truncated", status.isTruncated()) - .put("InReplyToStatusId", status.getInReplyToStatusId()) - .put("InReplyToUserId", status.getInReplyToUserId()) - .put("InReplyToScreenName", status.getInReplyToScreenName()) - .put("Favorited", status.isFavorited()) - .put("Retweeted", status.isRetweeted()) - .put("FavoriteCount", status.getFavoriteCount()) - .put("Retweet", status.isRetweet()) - .put("RetweetCount", status.getRetweetCount()) - .put("RetweetedByMe", status.isRetweetedByMe()) - .put("CurrentUserRetweetId", status.getCurrentUserRetweetId()) - .put("PossiblySensitive", status.isPossiblySensitive()) - .put("Lang", status.getLang()); - - Struct userStruct; - if (null != status.getUser()) { - userStruct = new Struct(USER_SCHEMA); - convert(status.getUser(), userStruct); - } else { - userStruct = null; - } - struct.put("User", userStruct); - - Struct placeStruct; - if (null != status.getPlace()) { - placeStruct = new Struct(PLACE_SCHEMA); - convert(status.getPlace(), placeStruct); - } else { - placeStruct = null; - } - struct.put("Place", placeStruct); - - Struct geoLocationStruct; - if (null != status.getGeoLocation()) { - geoLocationStruct = new Struct(GEO_LOCATION_SCHEMA); - convert(status.getGeoLocation(), geoLocationStruct); - } else { - geoLocationStruct = null; - } - struct.put("GeoLocation", geoLocationStruct); - List contributers = new ArrayList<>(); - - if (null != status.getContributors()) { - for (Long l : status.getContributors()) { - contributers.add(l); - } - } - struct.put("Contributors", contributers); - - List withheldInCountries = new ArrayList<>(); - if (null != status.getWithheldInCountries()) { - for (String s : status.getWithheldInCountries()) { - withheldInCountries.add(s); - } - } - struct.put("WithheldInCountries", withheldInCountries); - - struct.put("HashtagEntities", convert(status.getHashtagEntities())); - struct.put("UserMentionEntities", convert(status.getUserMentionEntities())); - struct.put("MediaEntities", convert(status.getMediaEntities())); - struct.put("SymbolEntities", convert(status.getSymbolEntities())); - struct.put("URLEntities", convert(status.getURLEntities())); - } - - public static void convert(StatusDeletionNotice statusDeletionNotice, Struct struct) { - struct.put("StatusId", statusDeletionNotice.getStatusId()); - struct.put("UserId", statusDeletionNotice.getUserId()); - } - - public static void convertKey(StatusDeletionNotice statusDeletionNotice, Struct struct) { - struct.put("StatusId", statusDeletionNotice.getStatusId()); - } -} diff --git a/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TweetConverter.java b/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TweetConverter.java new file mode 100644 index 0000000..152ab84 --- /dev/null +++ b/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TweetConverter.java @@ -0,0 +1,913 @@ +/** + * Copyright © 2022 Arek Burdach (arek.burdach@gmail.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.github.jcustenborder.kafka.connect.twitter; + +import com.twitter.clientlib.model.CashtagEntity; +import com.twitter.clientlib.model.ContextAnnotation; +import com.twitter.clientlib.model.ContextAnnotationDomainFields; +import com.twitter.clientlib.model.ContextAnnotationEntityFields; +import com.twitter.clientlib.model.FullTextEntities; +import com.twitter.clientlib.model.HashtagEntity; +import com.twitter.clientlib.model.MentionEntity; +import com.twitter.clientlib.model.Point; +import com.twitter.clientlib.model.ReplySettings; +import com.twitter.clientlib.model.Tweet; +import com.twitter.clientlib.model.TweetAttachments; +import com.twitter.clientlib.model.TweetEditControls; +import com.twitter.clientlib.model.TweetGeo; +import com.twitter.clientlib.model.TweetNonPublicMetrics; +import com.twitter.clientlib.model.TweetOrganicMetrics; +import com.twitter.clientlib.model.TweetPromotedMetrics; +import com.twitter.clientlib.model.TweetPublicMetrics; +import com.twitter.clientlib.model.TweetReferencedTweets; +import com.twitter.clientlib.model.TweetWithheld; +import com.twitter.clientlib.model.UrlEntity; +import com.twitter.clientlib.model.UrlImage; +import org.apache.kafka.connect.data.Decimal; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.data.Timestamp; + +import javax.annotation.Nonnull; +import java.math.RoundingMode; +import java.net.URL; +import java.util.ArrayList; +import java.util.Date; +import java.util.Optional; +import java.util.stream.Collectors; + +public class TweetConverter { + + /*{ + "type" : "object", + "required" : [ "type", "id" ], + "properties" : { + "type" : { + "type" : "string", + "enum" : [ "retweeted", "quoted", "replied_to" ] + }, + "id" : { + "type" : "string", + "pattern" : "^[0-9]{1,19}$" + } + } + }*/ + public static final Schema TWEET_REFERENCED_TWEETS_ITEM_SCHEMA = SchemaBuilder.struct() + .field(TweetReferencedTweets.SERIALIZED_NAME_TYPE, Schema.STRING_SCHEMA) + .field(TweetReferencedTweets.SERIALIZED_NAME_ID, Schema.STRING_SCHEMA) + .build(); + + + public static Struct convert(@Nonnull TweetReferencedTweets input) { + return new Struct(TWEET_REFERENCED_TWEETS_ITEM_SCHEMA) + .put(TweetReferencedTweets.SERIALIZED_NAME_TYPE, input.getType().getValue()) + .put(TweetReferencedTweets.SERIALIZED_NAME_ID, input.getId()); + } + + /*{ + "type" : "object", + "properties" : { + "media_keys" : { + "type" : "array", + "items" : { + "type" : "string", + "pattern" : "^([0-9]+)_([0-9]+)$" + }, + "minItems" : 1 + }, + "poll_ids" : { + "type" : "array", + "items" : { + "type" : "string", + "pattern" : "^[0-9]{1,19}$" + }, + "minItems" : 1 + } + } + }*/ + public static final Schema TWEET_ATTACHMENTS_SCHEMA = SchemaBuilder.struct() + .optional() + .field(TweetAttachments.SERIALIZED_NAME_MEDIA_KEYS, SchemaBuilder.array(Schema.STRING_SCHEMA).optional()) + .field(TweetAttachments.SERIALIZED_NAME_POLL_IDS, SchemaBuilder.array(Schema.STRING_SCHEMA).optional()) + .build(); + + public static Struct convert(@Nonnull TweetAttachments input) { + return new Struct(TWEET_ATTACHMENTS_SCHEMA) + .put(TweetAttachments.SERIALIZED_NAME_MEDIA_KEYS, input.getMediaKeys()) + .put(TweetAttachments.SERIALIZED_NAME_POLL_IDS, input.getPollIds()); + } + + /*{ + "type" : "object", + "required" : [ "id" ], + "properties" : { + "id" : { + "type" : "string", + "pattern" : "^[0-9]{1,19}$" + }, + "name" : { + "type" : "string" + }, + "description" : { + "type" : "string" + } + } + }*/ + public static final Schema CONTEXT_ANNOTATION_DOMAIN_FIELDS_SCHEMA = SchemaBuilder.struct() + .field(ContextAnnotationDomainFields.SERIALIZED_NAME_ID, Schema.STRING_SCHEMA) + .field(ContextAnnotationDomainFields.SERIALIZED_NAME_NAME, Schema.OPTIONAL_STRING_SCHEMA) + .field(ContextAnnotationDomainFields.SERIALIZED_NAME_DESCRIPTION, Schema.OPTIONAL_STRING_SCHEMA) + .build(); + + public static Struct convert(@Nonnull ContextAnnotationDomainFields input) { + return new Struct(CONTEXT_ANNOTATION_DOMAIN_FIELDS_SCHEMA) + .put(ContextAnnotationDomainFields.SERIALIZED_NAME_ID, input.getId()) + .put(ContextAnnotationDomainFields.SERIALIZED_NAME_NAME, input.getName()) + .put(ContextAnnotationDomainFields.SERIALIZED_NAME_DESCRIPTION, input.getDescription()); + } + + /*{ + "type" : "object", + "required" : [ "id" ], + "properties" : { + "id" : { + "type" : "string", + "pattern" : "^[0-9]{1,19}$" + }, + "name" : { + "type" : "string" + }, + "description" : { + "type" : "string" + } + } + }*/ + public static final Schema CONTEXT_ANNOTATION_DOMAIN_ENTITY_SCHEMA = SchemaBuilder.struct() + .field(ContextAnnotationEntityFields.SERIALIZED_NAME_ID, Schema.STRING_SCHEMA) + .field(ContextAnnotationEntityFields.SERIALIZED_NAME_NAME, Schema.OPTIONAL_STRING_SCHEMA) + .field(ContextAnnotationEntityFields.SERIALIZED_NAME_DESCRIPTION, Schema.OPTIONAL_STRING_SCHEMA) + .build(); + + public static Struct convert(@Nonnull ContextAnnotationEntityFields input) { + return new Struct(CONTEXT_ANNOTATION_DOMAIN_ENTITY_SCHEMA) + .put(ContextAnnotationEntityFields.SERIALIZED_NAME_ID, input.getId()) + .put(ContextAnnotationEntityFields.SERIALIZED_NAME_NAME, input.getName()) + .put(ContextAnnotationEntityFields.SERIALIZED_NAME_DESCRIPTION, input.getDescription()); + } + + /*{ + "type" : "object", + "required" : [ "domain", "entity" ], + "properties" : { + "domain" : { + "$ref" : "#/components/schemas/ContextAnnotationDomainFields" + }, + "entity" : { + "$ref" : "#/components/schemas/ContextAnnotationEntityFields" + } + } + }*/ + public static final Schema CONTEXT_ANNOTATION_SCHEMA = SchemaBuilder.struct() + .optional() + .field(ContextAnnotation.SERIALIZED_NAME_DOMAIN, CONTEXT_ANNOTATION_DOMAIN_FIELDS_SCHEMA) + .field(ContextAnnotation.SERIALIZED_NAME_ENTITY, CONTEXT_ANNOTATION_DOMAIN_ENTITY_SCHEMA) + .build(); + + public static Struct convert(@Nonnull ContextAnnotation input) { + return new Struct(CONTEXT_ANNOTATION_SCHEMA) + .put(ContextAnnotation.SERIALIZED_NAME_DOMAIN, convert(input.getDomain())) + .put(ContextAnnotation.SERIALIZED_NAME_ENTITY, convert(input.getEntity())); + } + + /*{ + "type" : "object", + "required" : [ "copyright", "country_codes" ], + "properties" : { + "copyright" : { + "type" : "boolean" + }, + "country_codes" : { + "type" : "array", + "items" : { + "type" : "string", + "pattern" : "^[A-Z]{2}$" + }, + "uniqueItems" : true, + "minItems" : 1 + }, + "scope" : { + "type" : "string", + "enum" : [ "tweet", "user" ] + } + } + }*/ + public static final Schema TWEET_WITHHELD_SCHEMA = SchemaBuilder.struct() + .optional() + .field(TweetWithheld.SERIALIZED_NAME_COPYRIGHT, Schema.BOOLEAN_SCHEMA) + .field(TweetWithheld.SERIALIZED_NAME_COUNTRY_CODES, SchemaBuilder.array(Schema.STRING_SCHEMA)) + .field(TweetWithheld.SERIALIZED_NAME_SCOPE, Schema.OPTIONAL_STRING_SCHEMA) + .build(); + + public static Struct convert(@Nonnull TweetWithheld input) { + return new Struct(TWEET_WITHHELD_SCHEMA) + .put(TweetWithheld.SERIALIZED_NAME_COPYRIGHT, input.getCopyright()) + .put(TweetWithheld.SERIALIZED_NAME_COUNTRY_CODES, new ArrayList<>(input.getCountryCodes())) + .put(TweetWithheld.SERIALIZED_NAME_SCOPE, + Optional.ofNullable(input.getScope()) + .map(TweetWithheld.ScopeEnum::getValue) + .orElse(null)); + } + + private static final int POINTS_COORDINATES_SCALE = 8; + + /*{ + "type" : "object", + "required" : [ "type", "coordinates" ], + "properties" : { + "type" : { + "type" : "string", + "enum" : [ "Point" ] + }, + "coordinates" : { + "type" : "array", + "items" : { + "type" : "number" + }, + "minItems" : 2, + "maxItems" : 2 + } + } + }*/ + public static final Schema POINT_SCHEMA = SchemaBuilder.struct() + .optional() + .field(Point.SERIALIZED_NAME_TYPE, Schema.STRING_SCHEMA) + .field(Point.SERIALIZED_NAME_COORDINATES, SchemaBuilder.array(Decimal.schema(POINTS_COORDINATES_SCALE))) + .build(); + + public static Struct convert(@Nonnull Point input) { + return new Struct(POINT_SCHEMA) + .put(Point.SERIALIZED_NAME_TYPE, input.getType().getValue()) + .put(Point.SERIALIZED_NAME_COORDINATES, input.getCoordinates().stream() + .map(d -> d.setScale(POINTS_COORDINATES_SCALE, RoundingMode.UNNECESSARY)) + .collect(Collectors.toList())); + } + + /*{ + "type" : "object", + "properties" : { + "coordinates" : { + "$ref" : "#/components/schemas/Point" + }, + "place_id" : { + "type" : "string" + } + } + }*/ + public static final Schema TWEET_GEO_SCHEMA = SchemaBuilder.struct() + .optional() + .field(TweetGeo.SERIALIZED_NAME_COORDINATES, POINT_SCHEMA) + .field(TweetGeo.SERIALIZED_NAME_PLACE_ID, Schema.OPTIONAL_STRING_SCHEMA) + .build(); + + public static Struct convert(@Nonnull TweetGeo input) { + return new Struct(TWEET_GEO_SCHEMA) + .put(TweetGeo.SERIALIZED_NAME_COORDINATES, + Optional.ofNullable(input.getCoordinates()) + .map(TweetConverter::convert) + .orElse(null)) + .put(TweetGeo.SERIALIZED_NAME_PLACE_ID, input.getPlaceId()); + } + + /*{ + "type" : "object", + "properties" : { + "url" : { + "type" : "string", + "format" : "uri" + }, + "height" : { + "type" : "integer", + "minimum" : 0 + }, + "width" : { + "type" : "integer", + "minimum" : 0 + } + } + }*/ + public static final Schema URL_IMAGE_SCHEMA = SchemaBuilder.struct() + .field(UrlImage.SERIALIZED_NAME_URL, Schema.STRING_SCHEMA) + .field(UrlImage.SERIALIZED_NAME_HEIGHT, Schema.INT32_SCHEMA) + .field(UrlImage.SERIALIZED_NAME_WIDTH, Schema.INT32_SCHEMA) + .build(); + + public static Struct convert(@Nonnull UrlImage input) { + return new Struct(URL_IMAGE_SCHEMA) + .put(UrlImage.SERIALIZED_NAME_URL, + Optional.ofNullable(input.getUrl()) + .map(URL::toString) + .orElse(null)) + .put(UrlImage.SERIALIZED_NAME_HEIGHT, input.getHeight()) + .put(UrlImage.SERIALIZED_NAME_WIDTH, input.getWidth()); + } + + /*{ + "type" : "object", + "required" : [ "start", "end", "url" ], + "properties" : { + "start" : { + "type" : "integer", + "minimum" : 0 + }, + "end" : { + "type" : "integer", + "minimum" : 0 + }, + "url" : { + "type" : "string", + "format" : "uri" + }, + "expanded_url" : { + "type" : "string", + "format" : "uri" + }, + "display_url" : { + "type" : "string" + }, + "unwound_url" : { + "type" : "string", + "format" : "uri" + }, + "status" : { + "type" : "integer", + "minimum" : 100, + "maximum" : 599 + }, + "title" : { + "type" : "string" + }, + "description" : { + "type" : "string" + }, + "images" : { + "type" : "array", + "items" : { + "$ref" : "#/components/schemas/URLImage" + }, + "minItems" : 1 + } + } + }*/ + public static final Schema URL_ENTITY_SCHEMA = SchemaBuilder.struct() + .field(UrlEntity.SERIALIZED_NAME_START, Schema.INT32_SCHEMA) + .field(UrlEntity.SERIALIZED_NAME_END, Schema.INT32_SCHEMA) + .field(UrlEntity.SERIALIZED_NAME_URL, Schema.STRING_SCHEMA) + .field(UrlEntity.SERIALIZED_NAME_EXPANDED_URL, Schema.OPTIONAL_STRING_SCHEMA) + .field(UrlEntity.SERIALIZED_NAME_DISPLAY_URL, Schema.OPTIONAL_STRING_SCHEMA) + .field(UrlEntity.SERIALIZED_NAME_UNWOUND_URL, Schema.OPTIONAL_STRING_SCHEMA) + .field(UrlEntity.SERIALIZED_NAME_STATUS, Schema.OPTIONAL_INT32_SCHEMA) + .field(UrlEntity.SERIALIZED_NAME_TITLE, Schema.OPTIONAL_STRING_SCHEMA) + .field(UrlEntity.SERIALIZED_NAME_DESCRIPTION, Schema.OPTIONAL_STRING_SCHEMA) + .field(UrlEntity.SERIALIZED_NAME_IMAGES, SchemaBuilder.array(URL_IMAGE_SCHEMA).optional().build()) + .build(); + + public static Struct convert(@Nonnull UrlEntity input) { + return new Struct(URL_ENTITY_SCHEMA) + .put(UrlEntity.SERIALIZED_NAME_START, input.getStart()) + .put(UrlEntity.SERIALIZED_NAME_END, input.getEnd()) + .put(UrlEntity.SERIALIZED_NAME_URL, input.getUrl().toString()) + .put(UrlEntity.SERIALIZED_NAME_EXPANDED_URL, + Optional.ofNullable(input.getExpandedUrl()) + .map(URL::toString) + .orElse(null)) + .put(UrlEntity.SERIALIZED_NAME_DISPLAY_URL, input.getDisplayUrl()) + .put(UrlEntity.SERIALIZED_NAME_UNWOUND_URL, + Optional.ofNullable(input.getUnwoundUrl()) + .map(URL::toString) + .orElse(null)) + .put(UrlEntity.SERIALIZED_NAME_STATUS, input.getStatus()) + .put(UrlEntity.SERIALIZED_NAME_TITLE, input.getTitle()) + .put(UrlEntity.SERIALIZED_NAME_DESCRIPTION, input.getDescription()) + .put(UrlEntity.SERIALIZED_NAME_IMAGES, + Optional.ofNullable(input.getImages()) + .map(list -> list.stream().map(TweetConverter::convert).collect(Collectors.toList())) + .orElse(null)); + } + + /*{ + "type" : "object", + "required" : [ "start", "end", "tag" ], + "properties" : { + "start" : { + "type" : "integer", + "minimum" : 0 + }, + "end" : { + "type" : "integer", + "minimum" : 0 + }, + "tag" : { + "type" : "string" + } + } + }*/ + public static final Schema HASHTAG_ENTITY_SCHEMA = SchemaBuilder.struct() + .field(HashtagEntity.SERIALIZED_NAME_START, Schema.INT32_SCHEMA) + .field(HashtagEntity.SERIALIZED_NAME_END, Schema.INT32_SCHEMA) + .field(HashtagEntity.SERIALIZED_NAME_TAG, Schema.STRING_SCHEMA) + .build(); + + public static Struct convert(@Nonnull HashtagEntity input) { + return new Struct(HASHTAG_ENTITY_SCHEMA) + .put(HashtagEntity.SERIALIZED_NAME_START, input.getStart()) + .put(HashtagEntity.SERIALIZED_NAME_END, input.getEnd()) + .put(HashtagEntity.SERIALIZED_NAME_TAG, input.getTag()); + } + + /*{ + "type" : "object", + "required" : [ "start", "end", "username", "id" ], + "properties" : { + "start" : { + "type" : "integer", + "minimum" : 0 + }, + "end" : { + "type" : "integer", + "minimum" : 0 + }, + "username" : { + "type" : "string", + "pattern" : "^[A-Za-z0-9_]{1,15}$" + }, + "id" : { + "type" : "string", + "pattern" : "^[0-9]{1,19}$" + } + } + }*/ + public static final Schema MENTION_ENTITY_SCHEMA = SchemaBuilder.struct() + .field(MentionEntity.SERIALIZED_NAME_START, Schema.INT32_SCHEMA) + .field(MentionEntity.SERIALIZED_NAME_END, Schema.INT32_SCHEMA) + .field(MentionEntity.SERIALIZED_NAME_USERNAME, Schema.STRING_SCHEMA) + .field(MentionEntity.SERIALIZED_NAME_ID, Schema.STRING_SCHEMA) + .build(); + + public static Struct convert(@Nonnull MentionEntity input) { + return new Struct(MENTION_ENTITY_SCHEMA) + .put(MentionEntity.SERIALIZED_NAME_START, input.getStart()) + .put(MentionEntity.SERIALIZED_NAME_END, input.getEnd()) + .put(MentionEntity.SERIALIZED_NAME_USERNAME, input.getUsername()) + .put(MentionEntity.SERIALIZED_NAME_ID, input.getId()); + } + + /*{ + "type" : "object", + "required" : [ "start", "end", "tag" ], + "properties" : { + "start" : { + "type" : "integer", + "minimum" : 0 + }, + "end" : { + "type" : "integer", + "minimum" : 0 + }, + "tag" : { + "type" : "string" + } + } + }*/ + public static final Schema CASHTAG_ENTITY_SCHEMA = SchemaBuilder.struct() + .field(CashtagEntity.SERIALIZED_NAME_START, Schema.INT32_SCHEMA) + .field(CashtagEntity.SERIALIZED_NAME_END, Schema.INT32_SCHEMA) + .field(CashtagEntity.SERIALIZED_NAME_TAG, Schema.STRING_SCHEMA) + .build(); + + public static Struct convert(@Nonnull CashtagEntity input) { + return new Struct(CASHTAG_ENTITY_SCHEMA) + .put(CashtagEntity.SERIALIZED_NAME_START, input.getStart()) + .put(CashtagEntity.SERIALIZED_NAME_END, input.getEnd()) + .put(CashtagEntity.SERIALIZED_NAME_TAG, input.getTag()); + } + + /*{ + "type" : "object", + "properties" : { + "urls" : { + "type" : "array", + "items" : { + "$ref" : "#/components/schemas/UrlEntity" + }, + "minItems" : 1 + }, + "hashtags" : { + "type" : "array", + "items" : { + "$ref" : "#/components/schemas/HashtagEntity" + }, + "minItems" : 1 + }, + "mentions" : { + "type" : "array", + "items" : { + "$ref" : "#/components/schemas/MentionEntity" + }, + "minItems" : 1 + }, + "cashtags" : { + "type" : "array", + "items" : { + "$ref" : "#/components/schemas/CashtagEntity" + }, + "minItems" : 1 + } + } + }*/ + public static final Schema FULL_TEXT_ENTITIES_SCHEMA = SchemaBuilder.struct() + .optional() + .field(FullTextEntities.SERIALIZED_NAME_URLS, SchemaBuilder.array(URL_ENTITY_SCHEMA).optional().build()) + .field(FullTextEntities.SERIALIZED_NAME_HASHTAGS, SchemaBuilder.array(HASHTAG_ENTITY_SCHEMA).optional().build()) + .field(FullTextEntities.SERIALIZED_NAME_MENTIONS, SchemaBuilder.array(MENTION_ENTITY_SCHEMA).optional().build()) + .field(FullTextEntities.SERIALIZED_NAME_CASHTAGS, SchemaBuilder.array(CASHTAG_ENTITY_SCHEMA).optional().build()) + .build(); + + public static Struct convert(@Nonnull FullTextEntities input) { + return new Struct(FULL_TEXT_ENTITIES_SCHEMA) + .put(FullTextEntities.SERIALIZED_NAME_URLS, + Optional.ofNullable(input.getUrls()) + .map(list -> list.stream().map(TweetConverter::convert).collect(Collectors.toList())) + .orElse(null)) + .put(FullTextEntities.SERIALIZED_NAME_HASHTAGS, + Optional.ofNullable(input.getHashtags()) + .map(list -> list.stream().map(TweetConverter::convert).collect(Collectors.toList())) + .orElse(null)) + .put(FullTextEntities.SERIALIZED_NAME_MENTIONS, + Optional.ofNullable(input.getMentions()) + .map(list -> list.stream().map(TweetConverter::convert).collect(Collectors.toList())) + .orElse(null)) + .put(FullTextEntities.SERIALIZED_NAME_CASHTAGS, + Optional.ofNullable(input.getCashtags()) + .map(list -> list.stream().map(TweetConverter::convert).collect(Collectors.toList())) + .orElse(null)); + } + + /*{ + "type" : "object", + "required" : [ "retweet_count", "reply_count", "like_count" ], + "properties" : { + "retweet_count" : { + "type" : "integer" + }, + "reply_count" : { + "type" : "integer" + }, + "like_count" : { + "type" : "integer" + }, + "quote_count" : { + "type" : "integer" + } + } + }*/ + public static final Schema TWEET_PUBLIC_METRICS_SCHEMA = SchemaBuilder.struct() + .optional() + .field(TweetPublicMetrics.SERIALIZED_NAME_RETWEET_COUNT, Schema.INT32_SCHEMA) + .field(TweetPublicMetrics.SERIALIZED_NAME_REPLY_COUNT, Schema.INT32_SCHEMA) + .field(TweetPublicMetrics.SERIALIZED_NAME_LIKE_COUNT, Schema.INT32_SCHEMA) + .field(TweetPublicMetrics.SERIALIZED_NAME_QUOTE_COUNT, Schema.OPTIONAL_INT32_SCHEMA) + .build(); + + public static Struct convert(@Nonnull TweetPublicMetrics input) { + return new Struct(TWEET_PUBLIC_METRICS_SCHEMA) + .put(TweetPublicMetrics.SERIALIZED_NAME_RETWEET_COUNT, input.getRetweetCount()) + .put(TweetPublicMetrics.SERIALIZED_NAME_REPLY_COUNT, input.getReplyCount()) + .put(TweetPublicMetrics.SERIALIZED_NAME_LIKE_COUNT, input.getLikeCount()) + .put(TweetPublicMetrics.SERIALIZED_NAME_QUOTE_COUNT, input.getQuoteCount()); + } + + /*{ + "type" : "object", + "properties" : { + "impression_count" : { + "type" : "integer", + "format" : "int32" + } + } + }*/ + public static final Schema TWEET_NON_PUBLIC_METRICS_SCHEMA = SchemaBuilder.struct() + .optional() + .field(TweetNonPublicMetrics.SERIALIZED_NAME_IMPRESSION_COUNT, Schema.OPTIONAL_INT32_SCHEMA) + .build(); + + public static Struct convert(@Nonnull TweetNonPublicMetrics input) { + return new Struct(TWEET_NON_PUBLIC_METRICS_SCHEMA) + .put(TweetNonPublicMetrics.SERIALIZED_NAME_IMPRESSION_COUNT, input.getImpressionCount()); + } + + /*{ + "type" : "object", + "properties" : { + "impression_count" : { + "type" : "integer", + "format" : "int32" + }, + "like_count" : { + "type" : "integer", + "format" : "int32" + }, + "reply_count" : { + "type" : "integer", + "format" : "int32" + }, + "retweet_count" : { + "type" : "integer", + "format" : "int32" + } + } + }*/ + public static final Schema TWEET_PROMOTED_METRICS_SCHEMA = SchemaBuilder.struct() + .optional() + .field(TweetPromotedMetrics.SERIALIZED_NAME_IMPRESSION_COUNT, Schema.OPTIONAL_INT32_SCHEMA) + .field(TweetPromotedMetrics.SERIALIZED_NAME_LIKE_COUNT, Schema.OPTIONAL_INT32_SCHEMA) + .field(TweetPromotedMetrics.SERIALIZED_NAME_REPLY_COUNT, Schema.OPTIONAL_INT32_SCHEMA) + .field(TweetPromotedMetrics.SERIALIZED_NAME_RETWEET_COUNT, Schema.OPTIONAL_INT32_SCHEMA) + .build(); + + public static Struct convert(@Nonnull TweetPromotedMetrics input) { + return new Struct(TWEET_PROMOTED_METRICS_SCHEMA) + .put(TweetPromotedMetrics.SERIALIZED_NAME_IMPRESSION_COUNT, input.getImpressionCount()) + .put(TweetPromotedMetrics.SERIALIZED_NAME_LIKE_COUNT, input.getLikeCount()) + .put(TweetPromotedMetrics.SERIALIZED_NAME_REPLY_COUNT, input.getReplyCount()) + .put(TweetPromotedMetrics.SERIALIZED_NAME_RETWEET_COUNT, input.getRetweetCount()); + } + + /*{ + "type" : "object", + "required" : [ "impression_count", "retweet_count", "reply_count", "like_count", "user_profile_clicks" ], + "properties" : { + "impression_count" : { + "type" : "integer" + }, + "retweet_count" : { + "type" : "integer" + }, + "reply_count" : { + "type" : "integer" + }, + "like_count" : { + "type" : "integer" + } + } + }*/ + public static final Schema TWEET_ORGANIC_METRICS_SCHEMA = SchemaBuilder.struct() + .optional() + .field(TweetOrganicMetrics.SERIALIZED_NAME_IMPRESSION_COUNT, Schema.INT32_SCHEMA) + .field(TweetOrganicMetrics.SERIALIZED_NAME_RETWEET_COUNT, Schema.INT32_SCHEMA) + .field(TweetOrganicMetrics.SERIALIZED_NAME_REPLY_COUNT, Schema.INT32_SCHEMA) + .field(TweetOrganicMetrics.SERIALIZED_NAME_LIKE_COUNT, Schema.INT32_SCHEMA) + .build(); + + public static Struct convert(@Nonnull TweetOrganicMetrics input) { + return new Struct(TWEET_ORGANIC_METRICS_SCHEMA) + .put(TweetOrganicMetrics.SERIALIZED_NAME_IMPRESSION_COUNT, input.getImpressionCount()) + .put(TweetOrganicMetrics.SERIALIZED_NAME_RETWEET_COUNT, input.getRetweetCount()) + .put(TweetOrganicMetrics.SERIALIZED_NAME_REPLY_COUNT, input.getReplyCount()) + .put(TweetOrganicMetrics.SERIALIZED_NAME_LIKE_COUNT, input.getLikeCount()); + } + + /*{ + "type" : "object", + "required" : [ + "is_edit_eligible", + "editable_until", + "edits_remaining" + ], + "properties" : { + "editable_until" : { + "type" : "string", + "format" : "date-time" + }, + "edits_remaining" : { + "type" : "integer" + }, + "is_edit_eligible" : { + "type" : "boolean" + } + } + }*/ + public static final Schema TWEET_EDIT_CONTROLS_SCHEMA = SchemaBuilder.struct() + .optional() + .field(TweetEditControls.SERIALIZED_NAME_EDITABLE_UNTIL, Timestamp.SCHEMA) + .field(TweetEditControls.SERIALIZED_NAME_EDITS_REMAINING, Schema.INT32_SCHEMA) + .field(TweetEditControls.SERIALIZED_NAME_IS_EDIT_ELIGIBLE, Schema.BOOLEAN_SCHEMA) + .build(); + + public static Struct convert(@Nonnull TweetEditControls input) { + return new Struct(TWEET_EDIT_CONTROLS_SCHEMA) + .put(TweetEditControls.SERIALIZED_NAME_EDITABLE_UNTIL, Date.from(input.getEditableUntil().toInstant())) + .put(TweetEditControls.SERIALIZED_NAME_EDITS_REMAINING, input.getEditsRemaining()) + .put(TweetEditControls.SERIALIZED_NAME_IS_EDIT_ELIGIBLE, input.getIsEditEligible()); + } + + /*{ + "type" : "object", + "required" : [ "id", "text", "edit_history_tweet_ids" ], + "properties" : { + "id" : { + "type" : "string", + "pattern" : "^[0-9]{1,19}$" + }, + "created_at" : { + "type" : "string", + "format" : "date-time" + }, + "text" : { + "type" : "string" + }, + "author_id" : { + "type" : "string", + "pattern" : "^[0-9]{1,19}$" + }, + "in_reply_to_user_id" : { + "type" : "string", + "pattern" : "^[0-9]{1,19}$" + }, + "referenced_tweets" : { + "type" : "array", + "items" : { + "$ref" : "#/components/schemas/TweetReferencedTweetsItem" + }, + "minItems" : 1 + }, + "attachments" : { + "$ref" : "#/components/schemas/TweetAttachments" + }, + "context_annotations" : { + "type" : "array", + "items" : { + "$ref" : "#/components/schemas/ContextAnnotation" + }, + "minItems" : 1 + }, + "withheld" : { + "$ref" : "#/components/schemas/TweetWithheld" + }, + "geo" : { + "$ref" : "#/components/schemas/TweetGeo" + }, + "entities" : { + "$ref" : "#/components/schemas/FullTextEntities" + }, + "public_metrics" : { + "$ref" : "#/components/schemas/TweetPublicMetrics" + }, + "possibly_sensitive" : { + "type" : "boolean" + }, + "lang" : { + "type" : "string" + }, + "source" : { + "type" : "string" + }, + "non_public_metrics" : { + "$ref" : "#/components/schemas/TweetNonPublicMetrics" + }, + "promoted_metrics" : { + "$ref" : "#/components/schemas/TweetPromotedMetrics" + }, + "organic_metrics" : { + "$ref" : "#/components/schemas/TweetOrganicMetrics" + }, + "conversation_id" : { + "type" : "string", + "pattern" : "^[0-9]{1,19}$" + }, + "edit_controls" : { + "$ref" : "#/components/schemas/TweetEditControls" + }, + "edit_history_tweet_ids" : { + "type" : "array", + "minItems" : 1, + "items" : { + "type" : "string", + "pattern" : "^[0-9]{1,19}$" + } + }, + "reply_settings" : { + "type" : "string", + "pattern" : "^[A-Za-z]{1,12}$", + "enum" : [ + "everyone", + "mentionedUsers", + "following", + "other" + ] + } + } + }*/ + public static final Schema TWEET_SCHEMA = SchemaBuilder.struct() + .field(Tweet.SERIALIZED_NAME_ID, Schema.STRING_SCHEMA) + .field(Tweet.SERIALIZED_NAME_CREATED_AT, Timestamp.builder().optional().build()) + .field(Tweet.SERIALIZED_NAME_TEXT, Schema.STRING_SCHEMA) + .field(Tweet.SERIALIZED_NAME_AUTHOR_ID, Schema.OPTIONAL_STRING_SCHEMA) + .field(Tweet.SERIALIZED_NAME_IN_REPLY_TO_USER_ID, Schema.OPTIONAL_STRING_SCHEMA) + .field(Tweet.SERIALIZED_NAME_REFERENCED_TWEETS, SchemaBuilder.array(TWEET_REFERENCED_TWEETS_ITEM_SCHEMA).optional().build()) + .field(Tweet.SERIALIZED_NAME_ATTACHMENTS, TWEET_ATTACHMENTS_SCHEMA) + .field(Tweet.SERIALIZED_NAME_CONTEXT_ANNOTATIONS, SchemaBuilder.array(CONTEXT_ANNOTATION_SCHEMA).optional().build()) + .field(Tweet.SERIALIZED_NAME_WITHHELD, TWEET_WITHHELD_SCHEMA) + .field(Tweet.SERIALIZED_NAME_GEO, TWEET_GEO_SCHEMA) + .field(Tweet.SERIALIZED_NAME_ENTITIES, FULL_TEXT_ENTITIES_SCHEMA) + .field(Tweet.SERIALIZED_NAME_PUBLIC_METRICS, TWEET_PUBLIC_METRICS_SCHEMA) + .field(Tweet.SERIALIZED_NAME_POSSIBLY_SENSITIVE, Schema.OPTIONAL_BOOLEAN_SCHEMA) + .field(Tweet.SERIALIZED_NAME_LANG, Schema.OPTIONAL_STRING_SCHEMA) + .field(Tweet.SERIALIZED_NAME_SOURCE, Schema.OPTIONAL_STRING_SCHEMA) + .field(Tweet.SERIALIZED_NAME_NON_PUBLIC_METRICS, TWEET_NON_PUBLIC_METRICS_SCHEMA) + .field(Tweet.SERIALIZED_NAME_PROMOTED_METRICS, TWEET_PROMOTED_METRICS_SCHEMA) + .field(Tweet.SERIALIZED_NAME_ORGANIC_METRICS, TWEET_ORGANIC_METRICS_SCHEMA) + .field(Tweet.SERIALIZED_NAME_CONVERSATION_ID, Schema.OPTIONAL_STRING_SCHEMA) + .field(Tweet.SERIALIZED_NAME_EDIT_CONTROLS, TWEET_EDIT_CONTROLS_SCHEMA) + .field(Tweet.SERIALIZED_NAME_EDIT_HISTORY_TWEET_IDS, SchemaBuilder.array(Schema.STRING_SCHEMA)) + .field(Tweet.SERIALIZED_NAME_REPLY_SETTINGS, Schema.OPTIONAL_STRING_SCHEMA); + + public static Struct convert(@Nonnull Tweet input) { + return new Struct(TWEET_SCHEMA) + .put(Tweet.SERIALIZED_NAME_ID, input.getId()) + .put(Tweet.SERIALIZED_NAME_CREATED_AT, + Optional.ofNullable(input.getCreatedAt()) + .map(offset -> Date.from(offset.toInstant())) + .orElse(null)) + .put(Tweet.SERIALIZED_NAME_TEXT, input.getText()) + .put(Tweet.SERIALIZED_NAME_AUTHOR_ID, input.getAuthorId()) + .put(Tweet.SERIALIZED_NAME_IN_REPLY_TO_USER_ID, input.getInReplyToUserId()) + .put(Tweet.SERIALIZED_NAME_REFERENCED_TWEETS, + Optional.ofNullable(input.getReferencedTweets()) + .map(list -> list.stream().map(TweetConverter::convert).collect(Collectors.toList())) + .orElse(null)) + .put(Tweet.SERIALIZED_NAME_ATTACHMENTS, + Optional.ofNullable(input.getAttachments()) + .map(TweetConverter::convert) + .orElse(null)) + .put(Tweet.SERIALIZED_NAME_CONTEXT_ANNOTATIONS, + Optional.ofNullable(input.getContextAnnotations()) + .map(list -> list.stream().map(TweetConverter::convert).collect(Collectors.toList())) + .orElse(null)) + .put(Tweet.SERIALIZED_NAME_WITHHELD, + Optional.ofNullable(input.getWithheld()) + .map(TweetConverter::convert) + .orElse(null)) + .put(Tweet.SERIALIZED_NAME_GEO, + Optional.ofNullable(input.getGeo()) + .map(TweetConverter::convert) + .orElse(null)) + .put(Tweet.SERIALIZED_NAME_ENTITIES, + Optional.ofNullable(input.getEntities()) + .map(TweetConverter::convert) + .orElse(null)) + .put(Tweet.SERIALIZED_NAME_PUBLIC_METRICS, + Optional.ofNullable(input.getPublicMetrics()) + .map(TweetConverter::convert) + .orElse(null)) + .put(Tweet.SERIALIZED_NAME_POSSIBLY_SENSITIVE, input.getPossiblySensitive()) + .put(Tweet.SERIALIZED_NAME_LANG, input.getLang()) + .put(Tweet.SERIALIZED_NAME_SOURCE, input.getSource()) + .put(Tweet.SERIALIZED_NAME_NON_PUBLIC_METRICS, + Optional.ofNullable(input.getNonPublicMetrics()) + .map(TweetConverter::convert) + .orElse(null)) + .put(Tweet.SERIALIZED_NAME_PROMOTED_METRICS, + Optional.ofNullable(input.getPromotedMetrics()) + .map(TweetConverter::convert) + .orElse(null)) + .put(Tweet.SERIALIZED_NAME_ORGANIC_METRICS, + Optional.ofNullable(input.getOrganicMetrics()) + .map(TweetConverter::convert) + .orElse(null)) + .put(Tweet.SERIALIZED_NAME_CONVERSATION_ID, input.getConversationId()) + .put(Tweet.SERIALIZED_NAME_EDIT_CONTROLS, + Optional.ofNullable(input.getEditControls()) + .map(TweetConverter::convert) + .orElse(null)) + .put(Tweet.SERIALIZED_NAME_EDIT_HISTORY_TWEET_IDS, input.getEditHistoryTweetIds()) + .put(Tweet.SERIALIZED_NAME_REPLY_SETTINGS, + Optional.ofNullable(input.getReplySettings()) + .map(ReplySettings::getValue) + .orElse(null)); + } + +} diff --git a/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceConnector.java b/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceConnector.java index 4ef3fc9..65ef24e 100644 --- a/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceConnector.java +++ b/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceConnector.java @@ -18,18 +18,12 @@ import com.github.jcustenborder.kafka.connect.utils.VersionUtil; import com.github.jcustenborder.kafka.connect.utils.config.Description; import com.github.jcustenborder.kafka.connect.utils.config.Title; -import com.google.common.base.Joiner; import com.google.common.base.Preconditions; -import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.Multimap; import org.apache.kafka.common.config.ConfigDef; import org.apache.kafka.connect.connector.Task; import org.apache.kafka.connect.source.SourceConnector; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.util.ArrayList; -import java.util.Collection; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -37,9 +31,8 @@ @Title("Twitter") @Description("This Twitter Source connector is used to pull data from Twitter in realtime.") public class TwitterSourceConnector extends SourceConnector { - private static Logger log = LoggerFactory.getLogger(TwitterSourceConnector.class); + Map settings; - private TwitterSourceConnectorConfig config; @Override public String version() { @@ -48,7 +41,6 @@ public String version() { @Override public void start(Map map) { - this.config = new TwitterSourceConnectorConfig(map); this.settings = map; } @@ -60,25 +52,9 @@ public Class taskClass() { @Override public List> taskConfigs(int maxTasks) { Preconditions.checkState(maxTasks > 0, "MaxTasks must be greater than 0"); - final int tasks = Math.min(maxTasks, this.config.filterKeywords.size()); - - - Multimap taskToKeywords = ArrayListMultimap.create(); - int index = 0; - for (String keyword : this.config.filterKeywords) { - final int taskID = index % tasks; - taskToKeywords.put(taskID, keyword); - index++; - } - final List> taskConfigs = new ArrayList<>(tasks); - - for (Integer taskID : taskToKeywords.keySet()) { - Collection keywords = taskToKeywords.get(taskID); - Map taskSettings = new LinkedHashMap<>(this.settings); - taskSettings.put(TwitterSourceConnectorConfig.FILTER_KEYWORDS_CONF, Joiner.on(',').join(keywords)); - taskConfigs.add(taskSettings); - } - + final List> taskConfigs = new ArrayList<>(1); + Map taskSettings = new LinkedHashMap<>(this.settings); + taskConfigs.add(taskSettings); return taskConfigs; } @@ -91,4 +67,5 @@ public void stop() { public ConfigDef config() { return TwitterSourceConnectorConfig.conf(); } + } diff --git a/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceConnectorConfig.java b/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceConnectorConfig.java index ca3bfab..64f5d0e 100644 --- a/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceConnectorConfig.java +++ b/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceConnectorConfig.java @@ -16,107 +16,55 @@ package com.github.jcustenborder.kafka.connect.twitter; import com.github.jcustenborder.kafka.connect.utils.config.ConfigKeyBuilder; -import com.github.jcustenborder.kafka.connect.utils.config.ConfigUtils; -import com.google.common.primitives.Longs; import org.apache.kafka.common.config.AbstractConfig; import org.apache.kafka.common.config.ConfigDef; import org.apache.kafka.common.config.ConfigDef.Importance; import org.apache.kafka.common.config.ConfigDef.Type; -import org.apache.kafka.common.config.ConfigException; -import twitter4j.conf.Configuration; -import twitter4j.conf.PropertyConfiguration; +import org.apache.kafka.common.config.types.Password; -import java.util.Collections; -import java.util.List; import java.util.Map; -import java.util.Properties; -import java.util.Set; -import java.util.stream.Collectors; - public class TwitterSourceConnectorConfig extends AbstractConfig { - public static final String TWITTER_DEBUG_CONF = "twitter.debug"; - public static final String TWITTER_OAUTH_CONSUMER_KEY_CONF = "twitter.oauth.consumerKey"; - public static final String TWITTER_OAUTH_SECRET_KEY_CONF = "twitter.oauth.consumerSecret"; - public static final String TWITTER_OAUTH_ACCESS_TOKEN_CONF = "twitter.oauth.accessToken"; - public static final String TWITTER_OAUTH_ACCESS_TOKEN_SECRET_CONF = "twitter.oauth.accessTokenSecret"; - public static final String FILTER_KEYWORDS_CONF = "filter.keywords"; - public static final String FILTER_USER_IDS_CONF = "filter.userIds"; - public static final String KAFKA_STATUS_TOPIC_CONF = "kafka.status.topic"; - public static final String KAFKA_STATUS_TOPIC_DOC = "Kafka topic to write the statuses to."; - public static final String PROCESS_DELETES_CONF = "process.deletes"; - public static final String PROCESS_DELETES_DOC = "Should this connector process deletes."; + public static final String TWITTER_BEARER_TOKEN_CONF = "twitter.bearerToken"; + public static final String FILTER_RULE_CONF = "filter.rule"; + public static final String TWEET_FIELDS_CONF = "tweet.fields"; + public static final String KAFKA_TWEETS_TOPIC_CONF = "kafka.tweets.topic"; + public static final String KAFKA_TWEETS_TOPIC_DOC = "Kafka topic to write the tweets to."; public static final String QUEUE_EMPTY_MS_CONF = "queue.empty.ms"; public static final String QUEUE_BATCH_SIZE_CONF = "queue.batch.size"; - private static final String TWITTER_DEBUG_DOC = "Flag to enable debug logging for the twitter api."; - private static final String TWITTER_OAUTH_CONSUMER_KEY_DOC = "OAuth consumer key"; - private static final String TWITTER_OAUTH_SECRET_KEY_DOC = "OAuth consumer secret"; - private static final String TWITTER_OAUTH_ACCESS_TOKEN_DOC = "OAuth access token"; - private static final String TWITTER_OAUTH_ACCESS_TOKEN_SECRET_DOC = "OAuth access token secret"; - private static final String FILTER_KEYWORDS_DOC = "Twitter keywords to filter for."; - private static final String FILTER_USER_IDS_DOC = "Twitter user IDs to follow."; + private static final String TWITTER_BEARER_TOKEN_DOC = "Bearer token"; + private static final String FILTER_RULE_DOC = "Twitter rule used in filtering."; + + private static final String TWEET_FIELDS_DOC = "Fields that will be returned for tweet."; public static final String QUEUE_EMPTY_MS_DOC = "The amount of time to wait if there are no records in the queue."; public static final String QUEUE_BATCH_SIZE_DOC = "The number of records to return in a single batch."; public final String topic; - public final boolean twitterDebug; - public final boolean processDeletes; - public final Set filterKeywords; - public final Set filterUserIds; + public final String filterRule; + + public final String tweetFields; public final int queueEmptyMs; public final int queueBatchSize; - + public final Password bearerToken; public TwitterSourceConnectorConfig(Map parsedConfig) { super(conf(), parsedConfig); - this.topic = this.getString(KAFKA_STATUS_TOPIC_CONF); - this.twitterDebug = this.getBoolean(TWITTER_DEBUG_CONF); - this.processDeletes = this.getBoolean(PROCESS_DELETES_CONF); - this.filterKeywords = ConfigUtils.getSet(this, FILTER_KEYWORDS_CONF); - this.filterUserIds = ConfigUtils.getSet(this, FILTER_USER_IDS_CONF) - .stream() - .map(Long::parseLong) - .collect(Collectors.toSet()); + this.topic = getString(KAFKA_TWEETS_TOPIC_CONF); + this.filterRule = getString(FILTER_RULE_CONF); + this.tweetFields = getString(TWEET_FIELDS_CONF); this.queueBatchSize = getInt(QUEUE_BATCH_SIZE_CONF); this.queueEmptyMs = getInt(QUEUE_EMPTY_MS_CONF); + this.bearerToken = getPassword(TWITTER_BEARER_TOKEN_CONF); } - static class UserIdValidator implements ConfigDef.Validator { - @Override - public void ensureValid(String key, Object o) { - if (o instanceof List) { - List userIds = (List) o; - for (String userId : userIds) { - if (null == Longs.tryParse(userId)) { - throw new ConfigException(key, userId, "Could not parse to long."); - } - } - } - } - } - - static final ConfigDef.Validator USERID_VALIDATOR = new UserIdValidator(); - public static ConfigDef conf() { return new ConfigDef() - .define(TWITTER_DEBUG_CONF, Type.BOOLEAN, false, Importance.LOW, TWITTER_DEBUG_DOC) - .define(TWITTER_OAUTH_CONSUMER_KEY_CONF, Type.PASSWORD, Importance.HIGH, TWITTER_OAUTH_CONSUMER_KEY_DOC) - .define(TWITTER_OAUTH_SECRET_KEY_CONF, Type.PASSWORD, Importance.HIGH, TWITTER_OAUTH_SECRET_KEY_DOC) - .define(TWITTER_OAUTH_ACCESS_TOKEN_CONF, Type.PASSWORD, Importance.HIGH, TWITTER_OAUTH_ACCESS_TOKEN_DOC) - .define(TWITTER_OAUTH_ACCESS_TOKEN_SECRET_CONF, Type.PASSWORD, Importance.HIGH, TWITTER_OAUTH_ACCESS_TOKEN_SECRET_DOC) - .define(FILTER_KEYWORDS_CONF, Type.LIST, Importance.HIGH, FILTER_KEYWORDS_DOC) - .define( - ConfigKeyBuilder.of(FILTER_USER_IDS_CONF, Type.LIST) - .importance(Importance.HIGH) - .documentation(FILTER_USER_IDS_DOC) - .defaultValue(Collections.emptyList()) - .validator(USERID_VALIDATOR) - .build() - ) - .define(KAFKA_STATUS_TOPIC_CONF, Type.STRING, Importance.HIGH, KAFKA_STATUS_TOPIC_DOC) - .define(PROCESS_DELETES_CONF, Type.BOOLEAN, Importance.HIGH, PROCESS_DELETES_DOC) + .define(TWITTER_BEARER_TOKEN_CONF, Type.PASSWORD, Importance.HIGH, TWITTER_BEARER_TOKEN_DOC) + .define(FILTER_RULE_CONF, Type.STRING, null, Importance.HIGH, FILTER_RULE_DOC) + .define(TWEET_FIELDS_CONF, Type.STRING, null, Importance.HIGH, TWEET_FIELDS_CONF) + .define(KAFKA_TWEETS_TOPIC_CONF, Type.STRING, Importance.HIGH, KAFKA_TWEETS_TOPIC_DOC) .define( ConfigKeyBuilder.of(QUEUE_EMPTY_MS_CONF, Type.INT) .importance(Importance.LOW) @@ -135,14 +83,4 @@ public static ConfigDef conf() { ); } - - public Configuration configuration() { - Properties properties = new Properties(); - /* - Grab all of the key/values that have a key that starts with twitter. This will strip 'twitter.' from beginning of - each key. This aligns with what the twitter4j framework is expecting. - */ - properties.putAll(this.originalsWithPrefix("twitter.")); - return new PropertyConfiguration(properties); - } } diff --git a/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceTask.java b/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceTask.java index 779a2bc..9a8ddf3 100644 --- a/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceTask.java +++ b/src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceTask.java @@ -1,12 +1,12 @@ /** * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) - * + *

* Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + *

* http://www.apache.org/licenses/LICENSE-2.0 - * + *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,144 +18,214 @@ import com.github.jcustenborder.kafka.connect.utils.VersionUtil; import com.github.jcustenborder.kafka.connect.utils.data.SourceRecordDeque; import com.github.jcustenborder.kafka.connect.utils.data.SourceRecordDequeBuilder; -import com.google.common.base.Joiner; import com.google.common.collect.ImmutableMap; +import com.twitter.clientlib.ApiException; +import com.twitter.clientlib.TwitterCredentialsBearer; +import com.twitter.clientlib.api.TweetsApi; +import com.twitter.clientlib.api.TwitterApi; +import com.twitter.clientlib.model.AddOrDeleteRulesRequest; +import com.twitter.clientlib.model.AddOrDeleteRulesResponse; +import com.twitter.clientlib.model.AddRulesRequest; +import com.twitter.clientlib.model.DeleteRulesRequest; +import com.twitter.clientlib.model.DeleteRulesRequestDelete; +import com.twitter.clientlib.model.FilteredStreamingTweetResponse; +import com.twitter.clientlib.model.Get2TweetsSampleStreamResponse; +import com.twitter.clientlib.model.Rule; +import com.twitter.clientlib.model.RuleNoId; +import com.twitter.clientlib.model.Tweet; +import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.errors.RetriableException; import org.apache.kafka.connect.source.SourceRecord; import org.apache.kafka.connect.source.SourceTask; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import twitter4j.FilterQuery; -import twitter4j.StallWarning; -import twitter4j.Status; -import twitter4j.StatusDeletionNotice; -import twitter4j.StatusListener; -import twitter4j.TwitterStream; -import twitter4j.TwitterStreamFactory; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; -public class TwitterSourceTask extends SourceTask implements StatusListener { - static final Logger log = LoggerFactory.getLogger(TwitterSourceTask.class); - SourceRecordDeque messageQueue; +public class TwitterSourceTask extends SourceTask { - TwitterStream twitterStream; - TwitterSourceConnectorConfig config; + private static final Logger log = LoggerFactory.getLogger(TwitterSourceTask.class); + private static final int RETRIES = 10; + private SourceRecordDeque messageQueue; + + private volatile boolean running; + + private TwitterSourceConnectorConfig config; @Override public String version() { - return VersionUtil.version(this.getClass()); + return VersionUtil.version(getClass()); } @Override public void start(Map map) { - this.config = new TwitterSourceConnectorConfig(map); - this.messageQueue = SourceRecordDequeBuilder.of() - .emptyWaitMs(this.config.queueEmptyMs) - .batchSize(this.config.queueBatchSize) + config = new TwitterSourceConnectorConfig(map); + messageQueue = SourceRecordDequeBuilder.of() + .emptyWaitMs(config.queueEmptyMs) + .batchSize(config.queueBatchSize) .build(); - TwitterStreamFactory twitterStreamFactory = new TwitterStreamFactory(this.config.configuration()); - this.twitterStream = twitterStreamFactory.getInstance(); - String[] keywords = this.config.filterKeywords.toArray(new String[0]); - if (log.isInfoEnabled()) { - log.info("Setting up filters. Keywords = {}", Joiner.on(", ").join(keywords)); + TwitterApi apiInstance = new TwitterApi(new TwitterCredentialsBearer(config.bearerToken.value())); + InputStream twitterStream; + try { + twitterStream = initTweetsStreamProcessing(apiInstance); + } catch (ApiException e) { + // Api exception can be temporary. We will try to retry it a few times. + throw new RetriableException(e); } + running = true; + Thread readingThread = new TweetsStreamProcessingThread(apiInstance, twitterStream); + readingThread.start(); + } + + private class TweetsStreamProcessingThread extends Thread { + + private final TwitterApi apiInstance; + + private InputStream twitterStream; - FilterQuery filterQuery = new FilterQuery(); - filterQuery.track(keywords); - if (!this.config.filterUserIds.isEmpty()) { - long[] userIds = this.config.filterUserIds.stream().mapToLong(Long::valueOf).toArray(); - log.info("Setting up filters. userIds = {}", Joiner.on(", ").join(this.config.filterUserIds)); - filterQuery.follow(userIds); + public TweetsStreamProcessingThread(TwitterApi apiInstance, InputStream initialTwitterStream) { + this.apiInstance = apiInstance; + this.twitterStream = initialTwitterStream; } - if (log.isInfoEnabled()) { - log.info("Starting the twitter stream."); + @Override + public void run() { + while (running) { + try { + BufferedReader reader = new BufferedReader(new InputStreamReader(twitterStream)); + String line = reader.readLine(); + while (running && line != null) { + if (config.filterRule != null) { + processFilteredStreamingTweetResponse(line); + } else { + processGet2TweetsSampleStreamResponse(line); + } + line = reader.readLine(); + } + closeTwitterStreamGracefully(); + } catch (Exception ex) { + log.error("Exception during tweets stream processing. Restarting stream processing...", ex); + try { + closeTwitterStreamGracefully(); + Thread.sleep(1000); + twitterStream = initTweetsStreamProcessing(apiInstance); + } catch (Exception exx) { + log.error("Exception during restart of stream processing. Stopping job..."); + throw new RuntimeException(exx); + } + } + } } - twitterStream.addListener(this); - twitterStream.filter(filterQuery); - } - @Override - public List poll() throws InterruptedException { - return this.messageQueue.getBatch(); + private void closeTwitterStreamGracefully() { + try { + twitterStream.close(); + } catch (IOException ex) { + log.error("Exception during tweets stream closing", ex); + } + } } - @Override - public void stop() { - if (log.isInfoEnabled()) { - log.info("Shutting down twitter stream."); + private InputStream initTweetsStreamProcessing(TwitterApi apiInstance) throws ApiException { + InputStream twitterStream; + if (config.filterRule != null) { + log.info("Setting up filter rule = {}", config.filterRule); + setFilterRule(apiInstance); + log.info("Starting tweets search stream."); + TweetsApi.APIsearchStreamRequest builder = apiInstance.tweets().searchStream(); + if (config.tweetFields != null) { + log.info("Setting up tweet fields = {}", config.tweetFields); + builder = builder.tweetFields(Arrays.stream(config.tweetFields.split(",")).collect(Collectors.toSet())); + } + twitterStream = builder.execute(RETRIES); + } else { + log.info("Starting tweets sample stream."); + TweetsApi.APIsampleStreamRequest builder = apiInstance.tweets().sampleStream(); + if (config.tweetFields != null) { + log.info("Setting up tweet fields = {}", config.tweetFields); + builder = builder.tweetFields(Arrays.stream(config.tweetFields.split(",")).collect(Collectors.toSet())); + } + twitterStream = builder.execute(RETRIES); } - twitterStream.shutdown(); + return twitterStream; } - @Override - public void onStatus(Status status) { - try { - Struct keyStruct = new Struct(StatusConverter.STATUS_SCHEMA_KEY); - Struct valueStruct = new Struct(StatusConverter.STATUS_SCHEMA); - - StatusConverter.convertKey(status, keyStruct); - StatusConverter.convert(status, valueStruct); - - Map sourcePartition = ImmutableMap.of(); - Map sourceOffset = ImmutableMap.of(); - - SourceRecord record = new SourceRecord(sourcePartition, sourceOffset, this.config.topic, StatusConverter.STATUS_SCHEMA_KEY, keyStruct, StatusConverter.STATUS_SCHEMA, valueStruct); - this.messageQueue.add(record); - } catch (Exception ex) { - if (log.isErrorEnabled()) { - log.error("Exception thrown", ex); + private void setFilterRule(TwitterApi apiInstance) throws ApiException { + List currentRules = apiInstance.tweets().getRules().execute(RETRIES).getData(); + if (currentRules != null && !currentRules.isEmpty()) { + List currentNotMatchingRulesIds = currentRules.stream() + .filter(rule -> !rule.getValue().equals(config.filterRule)) + .map(Rule::getId).collect(Collectors.toList()); + if (!currentNotMatchingRulesIds.isEmpty()) { + DeleteRulesRequest delete = new DeleteRulesRequest().delete(new DeleteRulesRequestDelete().ids(currentNotMatchingRulesIds)); + AddOrDeleteRulesResponse deleteRulesResult = apiInstance.tweets().addOrDeleteRules(new AddOrDeleteRulesRequest(delete)).execute(RETRIES); + log.debug("Delete rules result: " + deleteRulesResult); } } + if (currentRules == null || currentRules.stream().noneMatch(rule -> rule.getValue().equals(config.filterRule))) { + RuleNoId rule = new RuleNoId().value(config.filterRule); + AddRulesRequest add = new AddRulesRequest().addAddItem(rule); + AddOrDeleteRulesResponse addRulesResult = apiInstance.tweets().addOrDeleteRules(new AddOrDeleteRulesRequest(add)).execute(RETRIES); + log.debug("Add rules result: " + addRulesResult); + } else { + log.debug("Filter rule already configured"); + } } - @Override - public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { - if (!this.config.processDeletes) { - return; + private void processFilteredStreamingTweetResponse(String line) { + try { + FilteredStreamingTweetResponse tweetResponse = FilteredStreamingTweetResponse.fromJson(line); + if (tweetResponse != null) { + onTweet(tweetResponse.getData()); + } + } catch (Exception ex) { + log.error("Exception during TweetsSampleStreamResponse processing - will be skipped", ex); } + } + private void processGet2TweetsSampleStreamResponse(String line) { try { - Struct keyStruct = new Struct(StatusConverter.SCHEMA_STATUS_DELETION_NOTICE_KEY); - - StatusConverter.convertKey(statusDeletionNotice, keyStruct); - - Map sourcePartition = ImmutableMap.of(); - Map sourceOffset = ImmutableMap.of(); - - SourceRecord record = new SourceRecord(sourcePartition, sourceOffset, this.config.topic, StatusConverter.SCHEMA_STATUS_DELETION_NOTICE_KEY, keyStruct, null, null); - this.messageQueue.add(record); - } catch (Exception ex) { - if (log.isErrorEnabled()) { - log.error("Exception thrown", ex); + Get2TweetsSampleStreamResponse tweetResponse = Get2TweetsSampleStreamResponse.fromJson(line); + if (tweetResponse != null) { + onTweet(tweetResponse.getData()); } + } catch (Exception ex) { + log.error("Exception during Get2TweetsSampleStreamResponse processing - will be skipped", ex); } } @Override - public void onTrackLimitationNotice(int i) { - + public List poll() throws InterruptedException { + return messageQueue.getBatch(); } @Override - public void onScrubGeo(long l, long l1) { - + public void stop() { + log.info("Shutting down twitter stream."); + running = false; } - @Override - public void onStallWarning(StallWarning stallWarning) { - if (log.isWarnEnabled()) { - log.warn("code = '{}' percentFull = '{}' - {}", stallWarning.getCode(), stallWarning.getPercentFull(), stallWarning.getMessage()); - } - } + public void onTweet(Tweet tweet) { + try { + Struct value = TweetConverter.convert(tweet); - @Override - public void onException(Exception e) { - if (log.isErrorEnabled()) { - log.error("onException", e); + Map sourcePartition = ImmutableMap.of(); + Map sourceOffset = ImmutableMap.of(); + + SourceRecord record = new SourceRecord(sourcePartition, sourceOffset, config.topic, Schema.STRING_SCHEMA, tweet.getId(), TweetConverter.TWEET_SCHEMA, value); + messageQueue.add(record); + } catch (Exception ex) { + log.error("Exception thrown", ex); } } -} \ No newline at end of file + +} diff --git a/src/test/java/com/github/jcustenborder/kafka/connect/twitter/DocumentationTest.java b/src/test/java/com/github/jcustenborder/kafka/connect/twitter/DocumentationTest.java deleted file mode 100644 index 6eed779..0000000 --- a/src/test/java/com/github/jcustenborder/kafka/connect/twitter/DocumentationTest.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.github.jcustenborder.kafka.connect.twitter; - -import com.github.jcustenborder.kafka.connect.utils.BaseDocumentationTest; -import org.apache.kafka.connect.data.Schema; - -import java.lang.reflect.Field; -import java.lang.reflect.Modifier; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; - -public class DocumentationTest extends BaseDocumentationTest { - static Schema schema(Field field) { - try { - return (Schema) field.get(null); - } catch (IllegalAccessException e) { - throw new IllegalStateException(e); - } - } - - @Override - protected List schemas() { - List schemas = Arrays.stream(StatusConverter.class.getFields()) - .filter(field -> Modifier.isFinal(field.getModifiers())) - .filter(field -> Modifier.isStatic(field.getModifiers())) - .filter(field -> Schema.class.equals(field.getType())) - .map(DocumentationTest::schema) - .collect(Collectors.toList()); - return schemas; - } -} diff --git a/src/test/java/com/github/jcustenborder/kafka/connect/twitter/SchemaGeneratorTest.java b/src/test/java/com/github/jcustenborder/kafka/connect/twitter/SchemaGeneratorTest.java deleted file mode 100644 index a898198..0000000 --- a/src/test/java/com/github/jcustenborder/kafka/connect/twitter/SchemaGeneratorTest.java +++ /dev/null @@ -1,157 +0,0 @@ -package com.github.jcustenborder.kafka.connect.twitter; - -import com.google.common.base.CaseFormat; -import org.junit.jupiter.api.Test; -import org.reflections.Reflections; -import org.reflections.util.ClasspathHelper; -import org.reflections.util.ConfigurationBuilder; -import twitter4j.MediaEntity; -import twitter4j.TweetEntity; - -import java.lang.reflect.Method; -import java.lang.reflect.Modifier; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -public class SchemaGeneratorTest { - - List> list(Reflections reflections, Class cls) { - List> classes = reflections.getSubTypesOf(cls) - .stream() - .filter(aClass -> Modifier.isInterface(aClass.getModifiers())) - .collect(Collectors.toList()); - classes.sort(Comparator.comparing(Class::getName)); - return classes; - } - - String schema(Class cls) { - String result; - - if (String.class.equals(cls)) { - result = "SchemaBuilder.string().optional().doc(\"\").build()"; - } else if (int.class.equals(cls)) { - result = "SchemaBuilder.int32().optional().doc(\"\").build()"; - } else if (long.class.equals(cls)) { - result = "SchemaBuilder.int64().optional().doc(\"\").build()"; - } else if (cls.isArray()) { - String childSchema = schema(cls.getComponentType()); - result = String.format("SchemaBuilder.array(%s).optional().doc(\"\").build()", childSchema); - } else if (Map.class.isAssignableFrom(cls)) { - result = "SchemaBuilder.map(Schema.STRING_SCHEMA, SCHEMA_MEDIA_ENTITY_SIZE)"; - - } else { - result = "SCHEMA_" + CaseFormat.UPPER_CAMEL.to(CaseFormat.UPPER_UNDERSCORE, cls.getSimpleName()).replace('$', '_'); - } - - - return result; - } - - void processClass(Class cls, StringBuilder builder) { - - final String schemaConstantName; - final String schemaName; - final String typeName; - final String convertMethodName; - - if (null == cls.getDeclaringClass()) { - schemaConstantName = "SCHEMA_" + CaseFormat.UPPER_CAMEL.to(CaseFormat.UPPER_UNDERSCORE, cls.getSimpleName()); - schemaName = String.format("com.github.jcustenborder.kafka.connect.twitter.%s", cls.getSimpleName()); - typeName = cls.getSimpleName(); - convertMethodName = String.format("convert%s", cls.getSimpleName()); - } else { - schemaConstantName = "SCHEMA_" + CaseFormat.UPPER_CAMEL.to(CaseFormat.UPPER_UNDERSCORE, cls.getDeclaringClass().getSimpleName() + cls.getSimpleName()); - typeName = String.format("%s.%s", cls.getDeclaringClass().getSimpleName(), cls.getSimpleName()); - schemaName = String.format("com.github.jcustenborder.kafka.connect.twitter.%s.%s", cls.getSimpleName(), cls.getDeclaringClass().getSimpleName()); - convertMethodName = String.format("convert%s%s", cls.getDeclaringClass().getSimpleName(), cls.getSimpleName()); - } - - - builder.append(String.format("public static final Schema %s =SchemaBuilder.struct()\n", schemaConstantName)); - builder.append(String.format(" .name(\"%s\")\n", schemaName)); - builder.append(" .doc(\"\")\n"); - - Set methods = new HashSet<>(); - for (Method method : cls.getMethods()) { - String methodName = method.getName().replace("get", ""); - if (!methods.add(methodName)) { - continue; - } - String expectedSchema = schema(method.getReturnType()); - builder.append(String.format(" .field(\"%s\", %s)\n", methodName, expectedSchema)); - } - builder.append(" .build();\n\n"); - - methods.clear(); - String variableName = CaseFormat.UPPER_CAMEL.to(CaseFormat.LOWER_CAMEL, cls.getSimpleName()); - builder.append(String.format("static Struct %s(%s %s) {\n", convertMethodName, typeName, variableName)); - builder.append(String.format(" return new Struct(%s)", schemaConstantName)); - for (Method method : cls.getMethods()) { - String methodName = method.getName().replace("get", ""); - if (!methods.add(methodName)) { - continue; - } - builder.append(String.format("\n .put(\"%s\", %s.%s())", methodName, variableName, method.getName())); - } - builder.append(";\n }\n"); - - builder.append("\n"); - builder.append(String.format("public static List convert(%s[] items) {\n", typeName)); - builder.append(" List result = new ArrayList<>();\n"); - builder.append(" if(null==items) {\n"); - builder.append(" return result;\n"); - builder.append(" }\n"); - builder.append(String.format(" for(%s item: items) {\n", typeName)); - builder.append(String.format(" Struct struct = %s(item);\n", convertMethodName)); - builder.append(" result.add(struct);\n"); - builder.append(" }\n"); - builder.append(" return result;\n"); - builder.append("}\n"); - -// } -// public static List convert(UserMentionEntity[] userMentionEntities) { -// List result = new ArrayList<>(); -// if(null==userMentionEntities) { -// return result; -// } -// for(UserMentionEntity item: userMentionEntities) { -// Struct struct = convertUserMentionEntity(item); -// result.add(struct); -// } -// return result; -// } - - - } - - @Test - public void tweetEntities() { - Reflections reflections = new Reflections(new ConfigurationBuilder() - .setUrls(ClasspathHelper.forJavaClassPath()) - .forPackages(TweetEntity.class.getPackage().getName()) - ); - - List> allClasses = new ArrayList<>(); - List> classes = list(reflections, TweetEntity.class); - allClasses.add(MediaEntity.Variant.class); - allClasses.add(MediaEntity.Size.class); - allClasses.addAll(classes); - - - for (Class cls : allClasses) { - StringBuilder builder = new StringBuilder(); - processClass(cls, builder); - - System.out.println(builder); - } - - - } - - -} diff --git a/src/test/java/com/github/jcustenborder/kafka/connect/twitter/StatusConverterTest.java b/src/test/java/com/github/jcustenborder/kafka/connect/twitter/StatusConverterTest.java deleted file mode 100644 index 6ca4b5c..0000000 --- a/src/test/java/com/github/jcustenborder/kafka/connect/twitter/StatusConverterTest.java +++ /dev/null @@ -1,327 +0,0 @@ -/** - * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.github.jcustenborder.kafka.connect.twitter; - -import org.apache.kafka.connect.data.Struct; -import org.junit.jupiter.api.Test; -import twitter4j.GeoLocation; -import twitter4j.Place; -import twitter4j.Status; -import twitter4j.StatusDeletionNotice; -import twitter4j.User; - -import java.util.ArrayList; -import java.util.Date; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -public class StatusConverterTest { - - public static GeoLocation mockGeoLocation() { - return new GeoLocation(30.2672D, 97.7431D); - } - - public static Place mockPlace() { - Place place = mock(Place.class); - when(place.getName()).thenReturn("Example place"); - when(place.getStreetAddress()).thenReturn("123 Example St"); - when(place.getCountryCode()).thenReturn("US"); - when(place.getId()).thenReturn("asdfaisdfasd"); - when(place.getCountry()).thenReturn("United States"); - when(place.getPlaceType()).thenReturn("ADF"); - when(place.getURL()).thenReturn("http://www.example.com/"); - when(place.getFullName()).thenReturn("Example place"); - return place; - } - - public static Status mockStatus() { - Status status = mock(Status.class); - User user = mockUser(); - GeoLocation geoLocation = mockGeoLocation(); - Place place = mockPlace(); - - when(status.getCreatedAt()).thenReturn(new Date(1471667709998L)); - when(status.getId()).thenReturn(9823452L); - when(status.getText()).thenReturn("This is a twit"); - when(status.getSource()).thenReturn("foo"); - when(status.isTruncated()).thenReturn(false); - when(status.getInReplyToStatusId()).thenReturn(2345234L); - when(status.getInReplyToUserId()).thenReturn(8756786L); - when(status.getInReplyToScreenName()).thenReturn("foo"); - when(status.getGeoLocation()).thenReturn(geoLocation); - when(status.getPlace()).thenReturn(place); - when(status.isFavorited()).thenReturn(true); - when(status.isRetweeted()).thenReturn(false); - when(status.getFavoriteCount()).thenReturn(1234); - when(status.getUser()).thenReturn(user); - when(status.isRetweet()).thenReturn(false); - when(status.getContributors()).thenReturn(new long[]{431234L, 986789678L}); - when(status.getRetweetCount()).thenReturn(1234); - when(status.isRetweetedByMe()).thenReturn(false); - when(status.getCurrentUserRetweetId()).thenReturn(653456345L); - when(status.isPossiblySensitive()).thenReturn(false); - when(status.getLang()).thenReturn("en-US"); - when(status.getWithheldInCountries()).thenReturn(new String[]{"CN"}); - - return status; - } - - public static User mockUser() { - User user = mock(User.class); - - when(user.getId()).thenReturn(1234L); - when(user.getName()).thenReturn("Example User"); - when(user.getScreenName()).thenReturn("example"); - when(user.getLocation()).thenReturn("Austin, TX"); - when(user.getDescription()).thenReturn("This is a description"); - when(user.isContributorsEnabled()).thenReturn(true); - when(user.getProfileImageURL()).thenReturn("http://i.twittercdn.com/profile.jpg"); - when(user.getBiggerProfileImageURL()).thenReturn("http://i.twittercdn.com/biggerprofile.jpg"); - when(user.getMiniProfileImageURL()).thenReturn("http://i.twittercdn.com/mini.profile.jpg"); - when(user.getOriginalProfileImageURL()).thenReturn("http://i.twittercdn.com/original.profile.jpg"); - when(user.getProfileImageURLHttps()).thenReturn("https://i.twittercdn.com/profile.jpg"); - when(user.getBiggerProfileImageURLHttps()).thenReturn("https://i.twittercdn.com/bigger.profile.jpg"); - when(user.getMiniProfileImageURLHttps()).thenReturn("https://i.twittercdn.com/mini.profile.jpg"); - when(user.getOriginalProfileImageURLHttps()).thenReturn("https://i.twittercdn.com/original.profile.jpg"); - when(user.isDefaultProfileImage()).thenReturn(true); - when(user.getURL()).thenReturn("https://www.twitter.com/example"); - when(user.isProtected()).thenReturn(false); - when(user.getFollowersCount()).thenReturn(54245); - when(user.getProfileBackgroundColor()).thenReturn("#ffffff"); - when(user.getProfileTextColor()).thenReturn("#000000"); - when(user.getProfileLinkColor()).thenReturn("#aaaaaa"); - when(user.getProfileSidebarFillColor()).thenReturn("#333333"); - when(user.getProfileSidebarBorderColor()).thenReturn("#555555"); - when(user.isProfileUseBackgroundImage()).thenReturn(true); - when(user.isDefaultProfile()).thenReturn(true); - when(user.isShowAllInlineMedia()).thenReturn(true); - when(user.getFriendsCount()).thenReturn(452345234); - when(user.getCreatedAt()).thenReturn(new Date(1471665653209L)); - when(user.getFavouritesCount()).thenReturn(12341); - when(user.getUtcOffset()).thenReturn(8); - when(user.getTimeZone()).thenReturn("UTC"); - when(user.getProfileBackgroundImageURL()).thenReturn("https://i.twittercdn.com/original.background.jpg"); - when(user.getProfileBackgroundImageUrlHttps()).thenReturn("https://i.twittercdn.com/original.background.jpg"); - when(user.getProfileBannerURL()).thenReturn("https://i.twittercdn.com/original.banner.jpg"); - when(user.getProfileBannerRetinaURL()).thenReturn("https://i.twittercdn.com/original.banner.jpg"); - when(user.getProfileBannerIPadURL()).thenReturn("https://i.twittercdn.com/original.banner.jpg"); - when(user.getProfileBannerIPadRetinaURL()).thenReturn("https://i.twittercdn.com/original.banner.jpg"); - when(user.getProfileBannerMobileURL()).thenReturn("https://i.twittercdn.com/original.banner.jpg"); - when(user.getProfileBannerMobileRetinaURL()).thenReturn("https://i.twittercdn.com/original.banner.jpg"); - when(user.isProfileBackgroundTiled()).thenReturn(false); - when(user.getLang()).thenReturn("en-us"); - when(user.getStatusesCount()).thenReturn(543); - when(user.isGeoEnabled()).thenReturn(true); - when(user.isVerified()).thenReturn(true); - when(user.isTranslator()).thenReturn(false); - when(user.getListedCount()).thenReturn(4); - when(user.isFollowRequestSent()).thenReturn(false); - when(user.getWithheldInCountries()).thenReturn(new String[]{"CN"}); - - - return user; - } - - public static StatusDeletionNotice mockStatusDeletionNotice() { - StatusDeletionNotice statusDeletionNotice = mock(StatusDeletionNotice.class); - when(statusDeletionNotice.getStatusId()).thenReturn(1234565345L); - when(statusDeletionNotice.getUserId()).thenReturn(6543456354L); - return statusDeletionNotice; - } - - List convert(long[] values) { - List list = new ArrayList<>(); - for (Long l : values) { - list.add(l); - } - return list; - } - - List convert(String[] values) { - List list = new ArrayList<>(); - for (String l : values) { - list.add(l); - } - return list; - } - - void assertStatus(Status status, Struct struct) { - assertEquals(status.getCreatedAt(), struct.get("CreatedAt"), "CreatedAt does not match."); - assertEquals(status.getId(), struct.get("Id"), "Id does not match."); - assertEquals(status.getText(), struct.get("Text"), "Text does not match."); - assertEquals(status.getSource(), struct.get("Source"), "Source does not match."); - assertEquals(status.isTruncated(), struct.get("Truncated"), "Truncated does not match."); - assertEquals(status.getInReplyToStatusId(), struct.get("InReplyToStatusId"), "InReplyToStatusId does not match."); - assertEquals(status.getInReplyToUserId(), struct.get("InReplyToUserId"), "InReplyToUserId does not match."); - assertEquals(status.getInReplyToScreenName(), struct.get("InReplyToScreenName"), "InReplyToScreenName does not match."); - assertEquals(status.isFavorited(), struct.get("Favorited"), "Favorited does not match."); - assertEquals(status.isRetweeted(), struct.get("Retweeted"), "Retweeted does not match."); - assertEquals(status.getFavoriteCount(), struct.get("FavoriteCount"), "FavoriteCount does not match."); - assertEquals(status.isRetweet(), struct.get("Retweet"), "Retweet does not match."); - assertEquals(status.getRetweetCount(), struct.get("RetweetCount"), "RetweetCount does not match."); - assertEquals(status.isRetweetedByMe(), struct.get("RetweetedByMe"), "RetweetedByMe does not match."); - assertEquals(status.getCurrentUserRetweetId(), struct.get("CurrentUserRetweetId"), "CurrentUserRetweetId does not match."); - assertEquals(status.isPossiblySensitive(), struct.get("PossiblySensitive"), "PossiblySensitive does not match."); - assertEquals(status.getLang(), struct.get("Lang"), "Lang does not match."); - - assertUser(status.getUser(), struct.getStruct("User")); - assertPlace(status.getPlace(), struct.getStruct("Place")); - assertGeoLocation(status.getGeoLocation(), struct.getStruct("GeoLocation")); - - assertEquals(convert(status.getContributors()), struct.getArray("Contributors"), "Contributors does not match."); - assertEquals(convert(status.getWithheldInCountries()), struct.get("WithheldInCountries"), "WithheldInCountries does not match."); - } - - void assertGeoLocation(GeoLocation geoLocation, Struct struct) { - assertEquals(struct.getFloat64("Latitude"), 1, geoLocation.getLatitude()); - assertEquals(struct.getFloat64("Longitude"), 1, geoLocation.getLongitude()); - } - - void assertPlace(Place place, Struct struct) { - assertEquals(place.getName(), struct.get("Name"), "Name does not match."); - assertEquals(place.getStreetAddress(), struct.get("StreetAddress"), "StreetAddress does not match."); - assertEquals(place.getCountryCode(), struct.get("CountryCode"), "CountryCode does not match."); - assertEquals(place.getId(), struct.get("Id"), "Id does not match."); - assertEquals(place.getCountry(), struct.get("Country"), "Country does not match."); - assertEquals(place.getPlaceType(), struct.get("PlaceType"), "PlaceType does not match."); - assertEquals(place.getURL(), struct.get("URL"), "URL does not match."); - assertEquals(place.getFullName(), struct.get("FullName"), "FullName does not match."); - } - - void assertUser(User user, Struct struct) { - assertNotNull(struct, "struct should not be null."); - assertEquals(user.getId(), struct.get("Id"), "Id does not match."); - assertEquals(user.getName(), struct.get("Name"), "Name does not match."); - assertEquals(user.getScreenName(), struct.get("ScreenName"), "ScreenName does not match."); - assertEquals(user.getLocation(), struct.get("Location"), "Location does not match."); - assertEquals(user.getDescription(), struct.get("Description"), "Description does not match."); - assertEquals(user.isContributorsEnabled(), struct.get("ContributorsEnabled"), "ContributorsEnabled does not match."); - assertEquals(user.getProfileImageURL(), struct.get("ProfileImageURL"), "ProfileImageURL does not match."); - assertEquals(user.getBiggerProfileImageURL(), struct.get("BiggerProfileImageURL"), "BiggerProfileImageURL does not match."); - assertEquals(user.getMiniProfileImageURL(), struct.get("MiniProfileImageURL"), "MiniProfileImageURL does not match."); - assertEquals(user.getOriginalProfileImageURL(), struct.get("OriginalProfileImageURL"), "OriginalProfileImageURL does not match."); - assertEquals(user.getProfileImageURLHttps(), struct.get("ProfileImageURLHttps"), "ProfileImageURLHttps does not match."); - assertEquals(user.getBiggerProfileImageURLHttps(), struct.get("BiggerProfileImageURLHttps"), "BiggerProfileImageURLHttps does not match."); - assertEquals(user.getMiniProfileImageURLHttps(), struct.get("MiniProfileImageURLHttps"), "MiniProfileImageURLHttps does not match."); - assertEquals(user.getOriginalProfileImageURLHttps(), struct.get("OriginalProfileImageURLHttps"), "OriginalProfileImageURLHttps does not match."); - assertEquals(user.isDefaultProfileImage(), struct.get("DefaultProfileImage"), "DefaultProfileImage does not match."); - assertEquals(user.getURL(), struct.get("URL"), "URL does not match."); - assertEquals(user.isProtected(), struct.get("Protected"), "Protected does not match."); - assertEquals(user.getFollowersCount(), struct.get("FollowersCount"), "FollowersCount does not match."); - assertEquals(user.getProfileBackgroundColor(), struct.get("ProfileBackgroundColor"), "ProfileBackgroundColor does not match."); - assertEquals(user.getProfileTextColor(), struct.get("ProfileTextColor"), "ProfileTextColor does not match."); - assertEquals(user.getProfileLinkColor(), struct.get("ProfileLinkColor"), "ProfileLinkColor does not match."); - assertEquals(user.getProfileSidebarFillColor(), struct.get("ProfileSidebarFillColor"), "ProfileSidebarFillColor does not match."); - assertEquals(user.getProfileSidebarBorderColor(), struct.get("ProfileSidebarBorderColor"), "ProfileSidebarBorderColor does not match."); - assertEquals(user.isProfileUseBackgroundImage(), struct.get("ProfileUseBackgroundImage"), "ProfileUseBackgroundImage does not match."); - assertEquals(user.isDefaultProfile(), struct.get("DefaultProfile"), "DefaultProfile does not match."); - assertEquals(user.isShowAllInlineMedia(), struct.get("ShowAllInlineMedia"), "ShowAllInlineMedia does not match."); - assertEquals(user.getFriendsCount(), struct.get("FriendsCount"), "FriendsCount does not match."); - assertEquals(user.getCreatedAt(), struct.get("CreatedAt"), "CreatedAt does not match."); - assertEquals(user.getFavouritesCount(), struct.get("FavouritesCount"), "FavouritesCount does not match."); - assertEquals(user.getUtcOffset(), struct.get("UtcOffset"), "UtcOffset does not match."); - assertEquals(user.getTimeZone(), struct.get("TimeZone"), "TimeZone does not match."); - assertEquals(user.getProfileBackgroundImageURL(), struct.get("ProfileBackgroundImageURL"), "ProfileBackgroundImageURL does not match."); - assertEquals(user.getProfileBackgroundImageUrlHttps(), struct.get("ProfileBackgroundImageUrlHttps"), "ProfileBackgroundImageUrlHttps does not match."); - assertEquals(user.getProfileBannerURL(), struct.get("ProfileBannerURL"), "ProfileBannerURL does not match."); - assertEquals(user.getProfileBannerRetinaURL(), struct.get("ProfileBannerRetinaURL"), "ProfileBannerRetinaURL does not match."); - assertEquals(user.getProfileBannerIPadURL(), struct.get("ProfileBannerIPadURL"), "ProfileBannerIPadURL does not match."); - assertEquals(user.getProfileBannerIPadRetinaURL(), struct.get("ProfileBannerIPadRetinaURL"), "ProfileBannerIPadRetinaURL does not match."); - assertEquals(user.getProfileBannerMobileURL(), struct.get("ProfileBannerMobileURL"), "ProfileBannerMobileURL does not match."); - assertEquals(user.getProfileBannerMobileRetinaURL(), struct.get("ProfileBannerMobileRetinaURL"), "ProfileBannerMobileRetinaURL does not match."); - assertEquals(user.isProfileBackgroundTiled(), struct.get("ProfileBackgroundTiled"), "ProfileBackgroundTiled does not match."); - assertEquals(user.getLang(), struct.get("Lang"), "Lang does not match."); - assertEquals(user.getStatusesCount(), struct.get("StatusesCount"), "StatusesCount does not match."); - assertEquals(user.isGeoEnabled(), struct.get("GeoEnabled"), "GeoEnabled does not match."); - assertEquals(user.isVerified(), struct.get("Verified"), "Verified does not match."); - assertEquals(user.isTranslator(), struct.get("Translator"), "Translator does not match."); - assertEquals(user.getListedCount(), struct.get("ListedCount"), "ListedCount does not match."); - assertEquals(user.isFollowRequestSent(), struct.get("FollowRequestSent"), "FollowRequestSent does not match."); - } - - void assertKey(Status status, Struct struct) { - assertEquals(status.getId(), struct.get("Id"), "Id does not match."); - } - - @Test - public void convertStatus() { - Status status = mockStatus(); - Struct struct = new Struct(StatusConverter.STATUS_SCHEMA); - StatusConverter.convert(status, struct); - assertStatus(status, struct); - } - - @Test - public void convertUser() { - User user = mockUser(); - Struct struct = new Struct(StatusConverter.USER_SCHEMA); - StatusConverter.convert(user, struct); - assertUser(user, struct); - } - - @Test - public void convertPlace() { - Place place = mockPlace(); - Struct struct = new Struct(StatusConverter.PLACE_SCHEMA); - StatusConverter.convert(place, struct); - assertPlace(place, struct); - } - - @Test - public void convertGeoLocation() { - GeoLocation geoLocation = mockGeoLocation(); - Struct struct = new Struct(StatusConverter.GEO_LOCATION_SCHEMA); - StatusConverter.convert(geoLocation, struct); - assertGeoLocation(geoLocation, struct); - } - - @Test - public void convertStatusKey() { - Status status = mockStatus(); - Struct struct = new Struct(StatusConverter.STATUS_SCHEMA_KEY); - StatusConverter.convertKey(status, struct); - assertKey(status, struct); - } - - void assertStatusDeletionNotice(StatusDeletionNotice statusDeletionNotice, Struct struct) { - assertEquals(statusDeletionNotice.getStatusId(), struct.get("StatusId"), "StatusId does not match."); - assertEquals(statusDeletionNotice.getUserId(), struct.get("UserId"), "UserId does not match."); - } - - void assertStatusDeletionNoticeKey(StatusDeletionNotice statusDeletionNotice, Struct struct) { - assertEquals(statusDeletionNotice.getStatusId(), struct.get("StatusId"), "StatusId does not match."); - } - - @Test - public void convertStatusDeletionNotice() { - StatusDeletionNotice statusDeletionNotice = mockStatusDeletionNotice(); - Struct struct = new Struct(StatusConverter.SCHEMA_STATUS_DELETION_NOTICE); - StatusConverter.convert(statusDeletionNotice, struct); - assertStatusDeletionNotice(statusDeletionNotice, struct); - } - - @Test - public void convertKeyStatusDeletionNotice() { - StatusDeletionNotice statusDeletionNotice = mockStatusDeletionNotice(); - Struct struct = new Struct(StatusConverter.SCHEMA_STATUS_DELETION_NOTICE_KEY); - StatusConverter.convertKey(statusDeletionNotice, struct); - assertStatusDeletionNoticeKey(statusDeletionNotice, struct); - } -} diff --git a/src/test/java/com/github/jcustenborder/kafka/connect/twitter/TweetConverterTest.java b/src/test/java/com/github/jcustenborder/kafka/connect/twitter/TweetConverterTest.java new file mode 100644 index 0000000..3b987ad --- /dev/null +++ b/src/test/java/com/github/jcustenborder/kafka/connect/twitter/TweetConverterTest.java @@ -0,0 +1,103 @@ +/** + * Copyright © 2022 Arek Burdach (arek.burdach@gmail.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.github.jcustenborder.kafka.connect.twitter; + +import com.twitter.clientlib.JSON; +import com.twitter.clientlib.model.FullTextEntities; +import com.twitter.clientlib.model.HashtagEntity; +import com.twitter.clientlib.model.Point; +import com.twitter.clientlib.model.Tweet; +import com.twitter.clientlib.model.TweetEditControls; +import com.twitter.clientlib.model.TweetGeo; +import com.twitter.clientlib.model.UrlEntity; +import io.confluent.connect.avro.AvroConverter; +import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient; +import org.apache.commons.io.IOUtils; +import org.apache.kafka.connect.data.Struct; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collections; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TweetConverterTest { + + { + // init twitter json deserializers + new JSON(); + } + + @Test + public void shouldConvertTweetWithPhoto() throws IOException { + Tweet tweet = Tweet.fromJson(IOUtils.resourceToString("/sample_tweets/with-photo.json", StandardCharsets.UTF_8)); + Struct result = TweetConverter.convert(tweet); + assertEquals("How times have changed https://t.co/gCxUkZ4kZC", result.getString(Tweet.SERIALIZED_NAME_TEXT)); + assertEquals(5, result + .getStruct(Tweet.SERIALIZED_NAME_EDIT_CONTROLS) + .getInt32(TweetEditControls.SERIALIZED_NAME_EDITS_REMAINING)); + assertEquals("https://t.co/gCxUkZ4kZC", result + .getStruct(Tweet.SERIALIZED_NAME_ENTITIES) + .getArray(FullTextEntities.SERIALIZED_NAME_URLS) + .get(0) + .getString(UrlEntity.SERIALIZED_NAME_URL)); + assertEquals("everyone", result.getString(Tweet.SERIALIZED_NAME_REPLY_SETTINGS)); + } + + @Test + public void shouldConvertTweetWithReference() throws IOException { + Tweet tweet = Tweet.fromJson(IOUtils.resourceToString("/sample_tweets/with-reference.json", StandardCharsets.UTF_8)); + TweetConverter.convert(tweet); + } + + @Test + public void shouldConvertReplyTweet() throws IOException { + Tweet tweet = Tweet.fromJson(IOUtils.resourceToString("/sample_tweets/reply.json", StandardCharsets.UTF_8)); + TweetConverter.convert(tweet); + } + + @Test + public void shouldConvertTweetWithHashtag() throws IOException { + Tweet tweet = Tweet.fromJson(IOUtils.resourceToString("/sample_tweets/with-hashtag.json", StandardCharsets.UTF_8)); + Struct result = TweetConverter.convert(tweet); + assertEquals("Bitcoin", result + .getStruct(Tweet.SERIALIZED_NAME_ENTITIES) + .getArray(FullTextEntities.SERIALIZED_NAME_HASHTAGS) + .get(0) + .getString(HashtagEntity.SERIALIZED_NAME_TAG)); + } + + @Test + public void shouldConvertDecimalToDesiredScale() { + Tweet tweet = new Tweet(); + tweet.setId("foo"); + tweet.setText("foo"); + TweetGeo geo = new TweetGeo(); + Point point = new Point(); + point.setType(Point.TypeEnum.POINT); + point.setCoordinates(Arrays.asList(new BigDecimal("12.12345678"), new BigDecimal("12.1234567"))); + geo.setCoordinates(point); + tweet.setGeo(geo); + Struct result = TweetConverter.convert(tweet); + AvroConverter converter = new AvroConverter(new MockSchemaRegistryClient()); + converter.configure(Collections.singletonMap("schema.registry.url", "http://localhost:8080/not_used"), false); + converter.fromConnectData("foo", TweetConverter.TWEET_SCHEMA, result); + } + +} diff --git a/src/test/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceConnectorTest.java b/src/test/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceConnectorTest.java deleted file mode 100644 index aec60af..0000000 --- a/src/test/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceConnectorTest.java +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.github.jcustenborder.kafka.connect.twitter; - - -import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableMap; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.DynamicTest; -import org.junit.jupiter.api.TestFactory; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Stream; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.DynamicTest.dynamicTest; - -public class TwitterSourceConnectorTest { - - TwitterSourceConnector connector; - Map defaultSettings; - - @BeforeEach - public void setup() { - this.connector = new TwitterSourceConnector(); - this.defaultSettings = new LinkedHashMap<>(); - this.defaultSettings.put(TwitterSourceConnectorConfig.TWITTER_OAUTH_ACCESS_TOKEN_CONF, "xxxxxx"); - this.defaultSettings.put(TwitterSourceConnectorConfig.TWITTER_OAUTH_SECRET_KEY_CONF, "xxxxxx"); - this.defaultSettings.put(TwitterSourceConnectorConfig.TWITTER_OAUTH_CONSUMER_KEY_CONF, "xxxxxx"); - this.defaultSettings.put(TwitterSourceConnectorConfig.TWITTER_OAUTH_ACCESS_TOKEN_SECRET_CONF, "xxxxxx"); - this.defaultSettings.put(TwitterSourceConnectorConfig.KAFKA_STATUS_TOPIC_CONF, "xxxxxx"); - this.defaultSettings.put(TwitterSourceConnectorConfig.PROCESS_DELETES_CONF, "false"); - - } - - List> expectedSettings(List... keywords) { - List> result = new ArrayList<>(); - for (List keywordSet : keywords) { - Map settings = new LinkedHashMap<>(this.defaultSettings); - settings.put(TwitterSourceConnectorConfig.FILTER_KEYWORDS_CONF, Joiner.on(',').join(keywordSet)); - result.add(settings); - } - return result; - } - - @TestFactory - public Stream taskConfigs() { - - Map>> testCases = ImmutableMap.of( - 1, expectedSettings(Arrays.asList("one", "two", "three")), - 2, expectedSettings(Arrays.asList("one", "three"), Arrays.asList("two")), - 3, expectedSettings(Arrays.asList("one"), Arrays.asList("two"), Arrays.asList("three")) - ); - - return testCases.entrySet().stream() - .map(e -> dynamicTest(e.getKey().toString(), () -> { - this.defaultSettings.put(TwitterSourceConnectorConfig.FILTER_KEYWORDS_CONF, "one,two,three"); - this.connector.start(this.defaultSettings); - List> taskConfigs = this.connector.taskConfigs(e.getKey()); - assertEquals( - e.getValue(), - taskConfigs - ); - })); - - - } - -} diff --git a/src/test/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceTaskTest.java b/src/test/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceTaskTest.java deleted file mode 100644 index 1c7e083..0000000 --- a/src/test/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceTaskTest.java +++ /dev/null @@ -1,26 +0,0 @@ -/** - * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.github.jcustenborder.kafka.connect.twitter; - - -import org.junit.jupiter.api.Test; - -public class TwitterSourceTaskTest { - @Test - public void test() { - // Congrats on a passing test! - } -} \ No newline at end of file diff --git a/src/test/resources/sample_tweets/reply.json b/src/test/resources/sample_tweets/reply.json new file mode 100644 index 0000000..fd795f1 --- /dev/null +++ b/src/test/resources/sample_tweets/reply.json @@ -0,0 +1,153 @@ +{ + "id": "1589784924004884480", + "created_at": "2022-11-08T01:00:37.000Z", + "in_reply_to_user_id": "44196397", + "public_metrics": { + "retweet_count": 60, + "reply_count": 17, + "like_count": 1178, + "quote_count": 2 + }, + "entities": { + "urls": [ + { + "start": 10, + "end": 33, + "url": "https://t.co/W6qgqw4O0V", + "expanded_url": "https://twitter.com/KMAC_LA/status/1589784924004884480/photo/1", + "display_url": "pic.twitter.com/W6qgqw4O0V", + "media_key": "3_1589784920280297472" + } + ], + "mentions": [ + { + "start": 0, + "end": 9, + "username": "elonmusk", + "id": "44196397" + } + ] + }, + "attachments": { + "media_keys": [ + "3_1589784920280297472" + ] + }, + "edit_history_tweet_ids": [ + "1589784924004884480" + ], + "text": "@elonmusk https://t.co/W6qgqw4O0V", + "context_annotations": [ + { + "domain": { + "id": "46", + "name": "Business Taxonomy", + "description": "Categories within Brand Verticals that narrow down the scope of Brands" + }, + "entity": { + "id": "1557696848252391426", + "name": "Financial Services Business", + "description": "Brands, companies, advertisers and every non-person handle with the profit intent related to Banks, Credit cards, Insurance, Investments, Stocks " + } + }, + { + "domain": { + "id": "46", + "name": "Business Taxonomy", + "description": "Categories within Brand Verticals that narrow down the scope of Brands" + }, + "entity": { + "id": "1557697333571112960", + "name": "Technology Business", + "description": "Brands, companies, advertisers and every non-person handle with the profit intent related to softwares, apps, communication equipments, hardwares" + } + }, + { + "domain": { + "id": "10", + "name": "Person", + "description": "Named people in the world like Nelson Mandela" + }, + "entity": { + "id": "808713037230157824", + "name": "Elon Musk", + "description": "Elon Musk" + } + }, + { + "domain": { + "id": "65", + "name": "Interests and Hobbies Vertical", + "description": "Top level interests and hobbies groupings, like Food or Travel" + }, + "entity": { + "id": "781974596148793345", + "name": "Business & finance" + } + }, + { + "domain": { + "id": "66", + "name": "Interests and Hobbies Category", + "description": "A grouping of interests and hobbies entities, like Novelty Food or Destinations" + }, + "entity": { + "id": "857878777191211008", + "name": "Leadership", + "description": "Leadership" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "808713037230157824", + "name": "Elon Musk", + "description": "Elon Musk" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "1091420346660470784", + "name": "Tech personalities", + "description": "Tech Professionals" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "1166406108623163392", + "name": "Business personalities" + } + } + ], + "possibly_sensitive": false, + "source": "TweetDeck", + "author_id": "2302290235", + "edit_controls": { + "edits_remaining": 5, + "is_edit_eligible": false, + "editable_until": "2022-11-08T01:30:37.000Z" + }, + "referenced_tweets": [ + { + "type": "replied_to", + "id": "1589784884666859520" + } + ], + "conversation_id": "1589784134691741696", + "reply_settings": "everyone", + "lang": "qme" +} diff --git a/src/test/resources/sample_tweets/with-hashtag.json b/src/test/resources/sample_tweets/with-hashtag.json new file mode 100644 index 0000000..8aa1e10 --- /dev/null +++ b/src/test/resources/sample_tweets/with-hashtag.json @@ -0,0 +1,157 @@ +{ + "id": "1590294109771943938", + "created_at": "2022-11-09T10:43:56.000Z", + "public_metrics": { + "retweet_count": 20, + "reply_count": 19, + "like_count": 161, + "quote_count": 3 + }, + "entities": { + "urls": [ + { + "start": 47, + "end": 70, + "url": "https://t.co/NYvR5ronJP", + "expanded_url": "https://twitter.com/NewCryptoGlobe/status/1590294109771943938/photo/1", + "display_url": "pic.twitter.com/NYvR5ronJP", + "media_key": "3_1590294103606136832" + } + ], + "hashtags": [ + { + "start": 38, + "end": 46, + "tag": "Bitcoin" + } + ] + }, + "attachments": { + "media_keys": [ + "3_1590294103606136832" + ] + }, + "edit_history_tweet_ids": [ + "1590294109771943938" + ], + "text": "Już prawie. Wytrzymajcie bombelki \uD83D\uDC4A\uD83D\uDCC9\uD83D\uDCC8 #Bitcoin https://t.co/NYvR5ronJP", + "context_annotations": [ + { + "domain": { + "id": "46", + "name": "Business Taxonomy", + "description": "Categories within Brand Verticals that narrow down the scope of Brands" + }, + "entity": { + "id": "1557696848252391426", + "name": "Financial Services Business", + "description": "Brands, companies, advertisers and every non-person handle with the profit intent related to Banks, Credit cards, Insurance, Investments, Stocks " + } + }, + { + "domain": { + "id": "30", + "name": "Entities [Entity Service]", + "description": "Entity Service top level domain, every item that is in Entity Service should be in this domain" + }, + "entity": { + "id": "1007360414114435072", + "name": "Bitcoin cryptocurrency", + "description": "Bitcoin Cryptocurrency" + } + }, + { + "domain": { + "id": "66", + "name": "Interests and Hobbies Category", + "description": "A grouping of interests and hobbies entities, like Novelty Food or Destinations" + }, + "entity": { + "id": "913142676819648512", + "name": "Cryptocurrencies", + "description": "Cryptocurrency" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "913142676819648512", + "name": "Cryptocurrencies", + "description": "Cryptocurrency" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "1007360414114435072", + "name": "Bitcoin cryptocurrency", + "description": "Bitcoin Cryptocurrency" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "1484181943616884743", + "name": "Cryptocoins" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "1491481998862348291", + "name": "Digital asset industry" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "1492162686204854274", + "name": "Digital assets & cryptocurrency", + "description": "Cryptocurrency" + } + }, + { + "domain": { + "id": "174", + "name": "Digital Assets & Crypto", + "description": "For cryptocurrency entities" + }, + "entity": { + "id": "1007360414114435072", + "name": "Bitcoin cryptocurrency", + "description": "Bitcoin Cryptocurrency" + } + } + ], + "possibly_sensitive": false, + "source": "Twitter for iPhone", + "author_id": "1474153730887016453", + "edit_controls": { + "edits_remaining": 5, + "is_edit_eligible": false, + "editable_until": "2022-11-09T11:13:56.000Z" + }, + "conversation_id": "1590294109771943938", + "reply_settings": "everyone", + "lang": "pl" +} diff --git a/src/test/resources/sample_tweets/with-photo.json b/src/test/resources/sample_tweets/with-photo.json new file mode 100644 index 0000000..f7eebe6 --- /dev/null +++ b/src/test/resources/sample_tweets/with-photo.json @@ -0,0 +1,138 @@ +{ + "attachments": { + "media_keys": [ + "3_1589631941275418625" + ] + }, + "author_id": "44196397", + "context_annotations": [ + { + "domain": { + "id": "46", + "name": "Business Taxonomy", + "description": "Categories within Brand Verticals that narrow down the scope of Brands" + }, + "entity": { + "id": "1557696848252391426", + "name": "Financial Services Business", + "description": "Brands, companies, advertisers and every non-person handle with the profit intent related to Banks, Credit cards, Insurance, Investments, Stocks " + } + }, + { + "domain": { + "id": "46", + "name": "Business Taxonomy", + "description": "Categories within Brand Verticals that narrow down the scope of Brands" + }, + "entity": { + "id": "1557697333571112960", + "name": "Technology Business", + "description": "Brands, companies, advertisers and every non-person handle with the profit intent related to softwares, apps, communication equipments, hardwares" + } + }, + { + "domain": { + "id": "10", + "name": "Person", + "description": "Named people in the world like Nelson Mandela" + }, + "entity": { + "id": "808713037230157824", + "name": "Elon Musk", + "description": "Elon Musk" + } + }, + { + "domain": { + "id": "65", + "name": "Interests and Hobbies Vertical", + "description": "Top level interests and hobbies groupings, like Food or Travel" + }, + "entity": { + "id": "781974596148793345", + "name": "Business & finance" + } + }, + { + "domain": { + "id": "66", + "name": "Interests and Hobbies Category", + "description": "A grouping of interests and hobbies entities, like Novelty Food or Destinations" + }, + "entity": { + "id": "857878777191211008", + "name": "Leadership", + "description": "Leadership" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "808713037230157824", + "name": "Elon Musk", + "description": "Elon Musk" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "1091420346660470784", + "name": "Tech personalities", + "description": "Tech Professionals" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "1166406108623163392", + "name": "Business personalities" + } + } + ], + "edit_history_tweet_ids": [ + "1589631946644414467" + ], + "source": "Twitter for iPhone", + "text": "How times have changed https://t.co/gCxUkZ4kZC", + "edit_controls": { + "edits_remaining": 5, + "is_edit_eligible": false, + "editable_until": "2022-11-07T15:22:44.000Z" + }, + "possibly_sensitive": false, + "entities": { + "urls": [ + { + "start": 23, + "end": 46, + "url": "https://t.co/gCxUkZ4kZC", + "expanded_url": "https://twitter.com/elonmusk/status/1589631946644414467/photo/1", + "display_url": "pic.twitter.com/gCxUkZ4kZC", + "media_key": "3_1589631941275418625" + } + ] + }, + "conversation_id": "1589631946644414467", + "public_metrics": { + "retweet_count": 25047, + "reply_count": 16629, + "like_count": 327857, + "quote_count": 3645 + }, + "lang": "en", + "id": "1589631946644414467", + "created_at": "2022-11-07T14:52:44.000Z", + "reply_settings": "everyone" +} diff --git a/src/test/resources/sample_tweets/with-reference.json b/src/test/resources/sample_tweets/with-reference.json new file mode 100644 index 0000000..23e9fe4 --- /dev/null +++ b/src/test/resources/sample_tweets/with-reference.json @@ -0,0 +1,253 @@ +{ + "lang": "en", + "text": "View all destination chargers → https://t.co/lqJMwQTuvH https://t.co/AL9eudo7nH", + "author_id": "13298072", + "conversation_id": "1589602301802844161", + "source": "Twitter for Mac", + "created_at": "2022-11-07T12:54:56.000Z", + "referenced_tweets": [ + { + "type": "quoted", + "id": "1589226433875890176" + } + ], + "entities": { + "urls": [ + { + "start": 32, + "end": 55, + "url": "https://t.co/lqJMwQTuvH", + "expanded_url": "https://tesla.com/findus", + "display_url": "tesla.com/findus", + "images": [ + { + "url": "https://pbs.twimg.com/news_img/1589800450299113472/VSkuFPdr?format=jpg&name=orig", + "width": 1440, + "height": 540 + }, + { + "url": "https://pbs.twimg.com/news_img/1589800450299113472/VSkuFPdr?format=jpg&name=150x150", + "width": 150, + "height": 150 + } + ], + "status": 200, + "title": "Find Us | Tesla", + "description": "Use the interactive Find Us map to locate Tesla charging stations, service centers, galleries and stores on the go.", + "unwound_url": "https://www.tesla.com/findus" + }, + { + "start": 56, + "end": 79, + "url": "https://t.co/AL9eudo7nH", + "expanded_url": "https://twitter.com/BLKMDL3/status/1589226433875890176", + "display_url": "twitter.com/BLKMDL3/status…" + } + ] + }, + "id": "1589602301802844161", + "edit_history_tweet_ids": [ + "1589602301802844161" + ], + "possibly_sensitive": false, + "public_metrics": { + "retweet_count": 1530, + "reply_count": 1068, + "like_count": 17873, + "quote_count": 105 + }, + "context_annotations": [ + { + "domain": { + "id": "46", + "name": "Business Taxonomy", + "description": "Categories within Brand Verticals that narrow down the scope of Brands" + }, + "entity": { + "id": "1557696420500541440", + "name": "Automotive, Aircraft & Boat Business", + "description": "Brands, companies, advertisers and every non-person handle with the profit intent related to automobile, spacecraft, aircraft and boat manufacturing" + } + }, + { + "domain": { + "id": "46", + "name": "Business Taxonomy", + "description": "Categories within Brand Verticals that narrow down the scope of Brands" + }, + "entity": { + "id": "1557697439489880065", + "name": "Travel & Transportation Business", + "description": "Brands, companies, advertisers and every non-person handle with the profit intent related to travel as airlines, travel agencies, hotels" + } + }, + { + "domain": { + "id": "47", + "name": "Brand", + "description": "Brands and Companies" + }, + "entity": { + "id": "10044199219", + "name": "Tesla Motors" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "10044199219", + "name": "Tesla Motors" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "864154902926196737", + "name": "S&P 500", + "description": "S&P 500" + } + }, + { + "domain": { + "id": "30", + "name": "Entities [Entity Service]", + "description": "Entity Service top level domain, every item that is in Entity Service should be in this domain" + }, + "entity": { + "id": "781972125179518977", + "name": "Auto Manufacturer - Auto" + } + }, + { + "domain": { + "id": "47", + "name": "Brand", + "description": "Brands and Companies" + }, + "entity": { + "id": "10044199219", + "name": "Tesla Motors" + } + }, + { + "domain": { + "id": "30", + "name": "Entities [Entity Service]", + "description": "Entity Service top level domain, every item that is in Entity Service should be in this domain" + }, + "entity": { + "id": "839159814991167489", + "name": "Travel", + "description": "Travel" + } + }, + { + "domain": { + "id": "65", + "name": "Interests and Hobbies Vertical", + "description": "Top level interests and hobbies groupings, like Food or Travel" + }, + "entity": { + "id": "847528391163092993", + "name": "Automotive", + "description": "Car culture" + } + }, + { + "domain": { + "id": "66", + "name": "Interests and Hobbies Category", + "description": "A grouping of interests and hobbies entities, like Novelty Food or Destinations" + }, + "entity": { + "id": "847528576551337984", + "name": "Hybrid and electric vehicles", + "description": "Hybrid and electric vehicles" + } + }, + { + "domain": { + "id": "66", + "name": "Interests and Hobbies Category", + "description": "A grouping of interests and hobbies entities, like Novelty Food or Destinations" + }, + "entity": { + "id": "847528646185070592", + "name": "Luxury Cars", + "description": "Luxury" + } + }, + { + "domain": { + "id": "66", + "name": "Interests and Hobbies Category", + "description": "A grouping of interests and hobbies entities, like Novelty Food or Destinations" + }, + "entity": { + "id": "864938653063159808", + "name": "Hospitality", + "description": "Hospitality" + } + }, + { + "domain": { + "id": "67", + "name": "Interests and Hobbies", + "description": "Interests, opinions, and behaviors of individuals, groups, or cultures; like Speciality Cooking or Theme Parks" + }, + "entity": { + "id": "864939492947275776", + "name": "Hotel/Motel", + "description": "Hotel/Motel" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "10044199219", + "name": "Tesla Motors" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "847528391163092993", + "name": "Automotive", + "description": "Car culture" + } + }, + { + "domain": { + "id": "131", + "name": "Unified Twitter Taxonomy", + "description": "A taxonomy of user interests. " + }, + "entity": { + "id": "1196845866138533888", + "name": "Automobile Brands" + } + } + ], + "edit_controls": { + "edits_remaining": 5, + "is_edit_eligible": true, + "editable_until": "2022-11-07T13:24:56.000Z" + }, + "reply_settings": "everyone" +}