Skip to content

Commit

Permalink
Merge pull request #41 from uche-madu/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
uche-madu authored Oct 19, 2023
2 parents 4b4e9e3 + 0fdf348 commit 1e487ae
Showing 1 changed file with 15 additions and 7 deletions.
22 changes: 15 additions & 7 deletions pyspark-scripts/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
import os

GCS_BUCKET = "deb-capstone"
MOVIE_FILES = "{}/project-data/movie_reviews".format(GCS_BUCKET)
MOVIES_METADATA_FILE_PATH = "{}/project-data/movie_reviews/processed_files_metadata.txt".format(GCS_BUCKET)
MODEL_DIR = "{}/models/sentiment_spark_nlp".format(GCS_BUCKET)

# Data directories
MOVIE_FILES = os.path.join(GCS_BUCKET, "project-data", "movie_reviews")
LOG_FILES = os.path.join(GCS_BUCKET, "project-data", "log_reviews")

# Metadata directories
METADATA_DIR = os.path.join(GCS_BUCKET, "project-data", "metadata")
MOVIES_METADATA_FILE_PATH = os.path.join(METADATA_DIR, "movie_reviews_metadata.txt")
LOG_METADATA_FILE_PATH = os.path.join(METADATA_DIR, "log_reviews_metadata.txt")

# Model directory
MODEL_DIR = os.path.join(GCS_BUCKET, "models", "sentiment_spark_nlp")

# My HuggingFace sentiment model fine-tuned using IMDb movie reviews dataset.
MODEL_NAME = "dreemer6/bert-finetuned-sst2"
Expand All @@ -12,7 +23,4 @@
# Define BigQuery dataset and table
BQ_DATASET_NAME = "movie_analytics"
BQ_MOVIE_REVIEWS_TABLE = "classified_movie_review"
BQ_LOG_REVIEWS_TABLE = "review_logs"

LOG_FILES = "{}/project-data/log_reviews".format(GCS_BUCKET)
LOG_METADATA_FILE_PATH = "{}/project-data/log_reviews/processed_files_metadata.txt".format(GCS_BUCKET)
BQ_LOG_REVIEWS_TABLE = "review_logs"

0 comments on commit 1e487ae

Please sign in to comment.