Skip to content

Commit

Permalink
Merge pull request #42 from uche-madu/dev
Browse files Browse the repository at this point in the history
fix pyspark logic
  • Loading branch information
uche-madu authored Oct 19, 2023
2 parents 1e487ae + 915281b commit eac0d9f
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
8 changes: 4 additions & 4 deletions pyspark-scripts/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
GCS_BUCKET = "deb-capstone"

# Data directories
MOVIE_FILES = os.path.join(GCS_BUCKET, "project-data", "movie_reviews")
LOG_FILES = os.path.join(GCS_BUCKET, "project-data", "log_reviews")
MOVIE_FILES = os.path.join("project-data", "movie_reviews")
LOG_FILES = os.path.join("project-data", "log_reviews")

# Metadata directories
METADATA_DIR = os.path.join(GCS_BUCKET, "project-data", "metadata")
METADATA_DIR = os.path.join("project-data", "metadata")
MOVIES_METADATA_FILE_PATH = os.path.join(METADATA_DIR, "movie_reviews_metadata.txt")
LOG_METADATA_FILE_PATH = os.path.join(METADATA_DIR, "log_reviews_metadata.txt")

# Model directory
MODEL_DIR = os.path.join(GCS_BUCKET, "models", "sentiment_spark_nlp")
MODEL_DIR = os.path.join("models", "sentiment_spark_nlp")

# My HuggingFace sentiment model fine-tuned using IMDb movie reviews dataset.
MODEL_NAME = "dreemer6/bert-finetuned-sst2"
Expand Down
2 changes: 1 addition & 1 deletion pyspark-scripts/gcs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def load_files_from_gcs(
# Load data from GCS
df = (spark.read.format(file_format)
.options(**(read_options or {}))
.load(f"gs://{file}"))
.load(f"gs://{bucket_name}/{file}"))

dataframes.append(df)

Expand Down

0 comments on commit eac0d9f

Please sign in to comment.