Skip to content

Commit

Permalink
import autoinstall
Browse files Browse the repository at this point in the history
  • Loading branch information
BeachWang committed Aug 23, 2024
1 parent f8cbc92 commit 705065b
Show file tree
Hide file tree
Showing 52 changed files with 51 additions and 52 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.model_utils import prepare_sentencepiece_model

from ..base_op import OPERATORS, Deduplicator
from ..base_op import AUTOINSTALL, OPERATORS, Deduplicator
from ..common.helper_func import UnionFind, split_on_whitespace

OP_NAME = 'document_minhash_deduplicator'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from data_juicer.utils.constant import HashKeys
from data_juicer.utils.lazy_loader import LazyLoader

from ..base_op import OPERATORS, Deduplicator
from ..base_op import AUTOINSTALL, OPERATORS, Deduplicator
from ..common.helper_func import split_on_whitespace

OP_NAME = 'document_simhash_deduplicator'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/deduplicator/image_deduplicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.mm_utils import load_data_with_context, load_image

from ..base_op import OPERATORS, Deduplicator
from ..base_op import AUTOINSTALL, OPERATORS, Deduplicator
from ..op_fusion import LOADED_IMAGES
from .document_deduplicator import DocumentDeduplicator

Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/deduplicator/ray_basic_deduplicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from data_juicer.utils.availability_utils import AvailabilityChecking
from data_juicer.utils.constant import HashKeys
from data_juicer.utils.lazy_loader import LazyLoader

from ..base_op import Filter

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/deduplicator/ray_image_deduplicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.mm_utils import load_data_with_context, load_image

from ..base_op import OPERATORS
from ..base_op import AUTOINSTALL, OPERATORS
from ..op_fusion import LOADED_IMAGES
from .ray_basic_deduplicator import RayBasicDeduplicator

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/alphanumeric_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..common import get_words_from_document

OP_NAME = 'alphanumeric_filter'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/flagged_words_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from data_juicer.utils.model_utils import get_model, prepare_model

from ...utils.asset_utils import ASSET_DIR, load_words_asset
from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..common import (SPECIAL_CHARACTERS, get_words_from_document,
words_refinement)
from ..op_fusion import INTER_WORDS
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/image_aesthetics_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from data_juicer.utils.mm_utils import load_data_with_context, load_image

from ...utils.model_utils import get_model, prepare_model
from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..op_fusion import LOADED_IMAGES

OP_NAME = 'image_aesthetics_filter'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/image_face_ratio_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
load_image)
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, UNFORKABLE, Filter
from ..base_op import AUTOINSTALL, OPERATORS, UNFORKABLE, Filter
from ..op_fusion import LOADED_IMAGES

OP_NAME = 'image_face_ratio_filter'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/image_nsfw_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from data_juicer.utils.mm_utils import load_data_with_context, load_image
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..op_fusion import LOADED_IMAGES

OP_NAME = 'image_nsfw_filter'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/image_text_matching_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
load_image, remove_special_tokens)
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..op_fusion import LOADED_IMAGES

OP_NAME = 'image_text_matching_filter'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/image_text_similarity_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
load_image, remove_special_tokens)
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..op_fusion import LOADED_IMAGES

OP_NAME = 'image_text_similarity_filter'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/image_watermark_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from data_juicer.utils.mm_utils import load_data_with_context, load_image
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..op_fusion import LOADED_IMAGES

OP_NAME = 'image_watermark_filter'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/language_id_score_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter

OP_NAME = 'language_id_score_filter'

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/perplexity_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..common import get_words_from_document
from ..op_fusion import INTER_WORDS

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/phrase_grounding_recall_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
remove_special_tokens)
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..op_fusion import LOADED_IMAGES

OP_NAME = 'phrase_grounding_recall_filter'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/stopwords_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..common import (SPECIAL_CHARACTERS, get_words_from_document,
words_refinement)
from ..op_fusion import INTER_WORDS
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/text_action_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from data_juicer.utils.mm_utils import remove_special_tokens
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter

OP_NAME = 'text_action_filter'

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/text_entity_dependency_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from data_juicer.utils.mm_utils import remove_special_tokens
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter

OP_NAME = 'text_entity_dependency_filter'

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/token_num_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..common import get_words_from_document

OP_NAME = 'token_num_filter'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/video_aesthetics_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
load_data_with_context, load_video)

from ...utils.model_utils import get_model, prepare_model
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..base_op import AUTOINSTALL, AUTOINSTALL, OPERATORS, Filter
from ..op_fusion import INTER_SAMPLED_FRAMES, LOADED_VIDEOS

OP_NAME = 'video_aesthetics_filter'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
remove_special_tokens)
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..op_fusion import INTER_SAMPLED_FRAMES, LOADED_VIDEOS

OP_NAME = 'video_frames_text_similarity_filter'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/video_motion_score_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from data_juicer.utils.constant import Fields, StatsKeys
from data_juicer.utils.lazy_loader import LazyLoader

from ..base_op import OPERATORS, UNFORKABLE, Filter
from ..base_op import AUTOINSTALL, OPERATORS, UNFORKABLE, Filter

OP_NAME = 'video_motion_score_filter'

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/video_nsfw_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
load_data_with_context, load_video)
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..op_fusion import INTER_SAMPLED_FRAMES, LOADED_VIDEOS

OP_NAME = 'video_nsfw_filter'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/video_ocr_area_ratio_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
extract_video_frames_uniformly,
load_data_with_context, load_video)

from ..base_op import OPERATORS, UNFORKABLE, Filter
from ..base_op import AUTOINSTALL, OPERATORS, UNFORKABLE, Filter
from ..op_fusion import INTER_SAMPLED_FRAMES, LOADED_VIDEOS

OP_NAME = 'video_ocr_area_ratio_filter'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/video_tagging_from_frames_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from data_juicer.utils.constant import Fields
from data_juicer.utils.lazy_loader import LazyLoader

from ..base_op import OPERATORS, UNFORKABLE, Filter
from ..base_op import AUTOINSTALL, OPERATORS, UNFORKABLE, Filter
from ..mapper.video_tagging_from_frames_mapper import \
VideoTaggingFromFramesMapper
from ..op_fusion import LOADED_VIDEOS
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/video_watermark_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
load_data_with_context, load_video)
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..op_fusion import INTER_SAMPLED_FRAMES, LOADED_VIDEOS

OP_NAME = 'video_watermark_filter'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/word_repetition_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..common import (SPECIAL_CHARACTERS, get_words_from_document,
words_refinement)
from ..op_fusion import INTER_WORDS
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/words_num_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Filter
from ..base_op import AUTOINSTALL, OPERATORS, Filter
from ..common import (SPECIAL_CHARACTERS, get_words_from_document,
words_refinement)
from ..op_fusion import INTER_WORDS
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/audio_ffmpeg_wrapped_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from data_juicer.utils.file_utils import transfer_filename
from data_juicer.utils.logger_utils import HiddenPrints

from ..base_op import OPERATORS, Mapper
from ..base_op import AUTOINSTALL, OPERATORS, Mapper

OP_NAME = 'audio_ffmpeg_wrapped_mapper'

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/chinese_convert_mapper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from data_juicer.utils.availability_utils import AvailabilityChecking
from data_juicer.utils.lazy_loader import LazyLoader

from ..base_op import OPERATORS, Mapper
from ..base_op import AUTOINSTALL, OPERATORS, Mapper

OP_NAME = 'chinese_convert_mapper'

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/clean_html_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from data_juicer.utils.availability_utils import AvailabilityChecking
from data_juicer.utils.lazy_loader import LazyLoader

from ..base_op import OPERATORS, Mapper
from ..base_op import AUTOINSTALL, OPERATORS, Mapper

OP_NAME = 'clean_html_mapper'

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/fix_unicode_mapper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from data_juicer.utils.availability_utils import AvailabilityChecking
from data_juicer.utils.lazy_loader import LazyLoader

from ..base_op import OPERATORS, Mapper
from ..base_op import AUTOINSTALL, OPERATORS, Mapper

OP_NAME = 'fix_unicode_mapper'

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/image_captioning_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
remove_special_tokens)
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Mapper
from ..base_op import AUTOINSTALL, OPERATORS, Mapper
from ..op_fusion import LOADED_IMAGES

OP_NAME = 'image_captioning_mapper'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/image_diffusion_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
load_image, remove_special_tokens)
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Mapper
from ..base_op import AUTOINSTALL, OPERATORS, Mapper
from ..op_fusion import LOADED_IMAGES

OP_NAME = 'image_diffusion_mapper'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/image_face_blur_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
load_image)
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, UNFORKABLE, Mapper
from ..base_op import AUTOINSTALL, OPERATORS, UNFORKABLE, Mapper
from ..op_fusion import LOADED_IMAGES

OP_NAME = 'image_face_blur_mapper'
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/nlpaug_en_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from data_juicer.utils.availability_utils import AvailabilityChecking
from data_juicer.utils.lazy_loader import LazyLoader

from ..base_op import OPERATORS, Mapper
from ..base_op import AUTOINSTALL, OPERATORS, Mapper

OP_NAME = 'nlpaug_en_mapper'

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/nlpcda_zh_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.logger_utils import HiddenPrints

from ..base_op import OPERATORS, Mapper
from ..base_op import AUTOINSTALL, OPERATORS, Mapper

OP_NAME = 'nlpcda_zh_mapper'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Mapper
from ..base_op import AUTOINSTALL, OPERATORS, Mapper
from ..common import (SPECIAL_CHARACTERS, get_words_from_document,
merge_on_whitespace_tab_newline,
split_on_newline_tab_whitespace, strip)
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/sentence_split_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Mapper
from ..base_op import AUTOINSTALL, OPERATORS, Mapper
from ..common import get_sentences_from_document

OP_NAME = 'sentence_split_mapper'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from data_juicer.utils.mm_utils import SpecialTokens, extract_audio_from_video
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Mapper
from ..base_op import AUTOINSTALL, OPERATORS, Mapper

NAME = 'video_captioning_from_audio_mapper'
CHECK_PKGS = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
remove_special_tokens)
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Mapper
from ..base_op import AUTOINSTALL, OPERATORS, Mapper
from ..op_fusion import LOADED_VIDEOS

OP_NAME = 'video_captioning_from_frames_mapper'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from data_juicer.utils.mm_utils import SpecialTokens, remove_special_tokens
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Mapper
from ..base_op import AUTOINSTALL, OPERATORS, Mapper

NAME = 'video_captioning_from_summarizer_mapper'
CHECK_PKGS = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, Mapper
from ..op_fusion import LOADED_VIDEOS
from ..op_fusion import AUTOINSTALL, LOADED_VIDEOS

OP_NAME = 'video_captioning_from_video_mapper'

Expand Down
Loading

0 comments on commit 705065b

Please sign in to comment.