Skip to content

Commit

Permalink
+ add docs
Browse files Browse the repository at this point in the history
  • Loading branch information
HYLcool committed Sep 9, 2024
1 parent 1dd0fb8 commit 08e76c8
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 1 deletion.
5 changes: 5 additions & 0 deletions configs/config_all.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ process:
cv_classifier: '' # OpenCV classifier path for face detection. By default, we will use 'haarcascade_frontalface_alt.xml'.
blur_type: 'gaussian' # type of blur kernel, including ['mean', 'box', 'gaussian']
radius: 2 # radius of blur kernel
- image_tagging_mapper: # Mapper to generate image tags.
tag_field_name: '__dj__image_tags__' # the field name to store the tags. It's "__dj__image_tags__" in default.
- nlpaug_en_mapper: # simply augment texts in English based on the nlpaug library
sequential: false # whether combine all augmentation methods to a sequence. If it's True, a sample will be augmented by all opened augmentation methods sequentially. If it's False, each opened augmentation method would generate its augmented samples independently.
aug_num: 1 # number of augmented samples to be generated. If `sequential` is True, there will be total aug_num augmented samples generated. If it's False, there will be (aug_num * #opened_aug_method) augmented samples generated.
Expand Down Expand Up @@ -258,10 +260,12 @@ process:
show_progress: false # whether to show progress from scenedetect
- video_tagging_from_audio_mapper: # Mapper to generate video tags from audio streams extracted from the video.
hf_ast: 'MIT/ast-finetuned-audioset-10-10-0.4593' # Huggingface model name for the audio classification model.
tag_field_name: '__dj__video_audio_tags__' # the field name to store the tags. It's "__dj__video_audio_tags__" in default.
mem_required: '500MB' # This operation (Op) utilizes deep neural network models that consume a significant amount of memory for computation, hence the system's available memory might constrains the maximum number of processes that can be launched
- video_tagging_from_frames_mapper: # Mapper to generate video tags from frames extracted from the video.
frame_sampling_method: 'all_keyframes' # sampling method of extracting frame images from the videos. Should be one of ["all_keyframes", "uniform"]. The former one extracts all key frames and the latter one extract specified number of frames uniformly from the video. Default: "all_keyframes".
frame_num: 3 # the number of frames to be extracted uniformly from the video. Only works when frame_sampling_method is "uniform". If it's 1, only the middle frame will be extracted. If it's 2, only the first and the last frames will be extracted. If it's larger than 2, in addition to the first and the last frames, other frames will be extracted uniformly within the video duration.
tag_field_name: '__dj__video_frame_tags__' # the field name to store the tags. It's "__dj__video_frame_tags__" in default.
- whitespace_normalization_mapper: # normalize different kinds of whitespaces to English whitespace.

# Filter ops
Expand Down Expand Up @@ -473,6 +477,7 @@ process:
contain: any # require the videos containing 'any' or 'all' given tags. When tags equal to [], 'all' keeps all samples, 'any' keeps no sample.
frame_sampling_method: all_keyframes # sampling method of extracting frame images from the videos. Should be one of ["all_keyframes", "uniform"]. The former one extracts all key frames and the latter one extract specified number of frames uniformly from the video. Default: "all_keyframes".
frame_num: 3 # the number of frames to be extracted uniformly from the video. Only works when frame_sampling_method is "uniform". If it's 1, only the middle frame will be extracted. If it's 2, only the first and the last frames will be extracted. If it's larger than 2, in addition to the first and the last frames, other frames will be extracted uniformly within the video duration.
tag_field_name: '__dj__video_frame_tags__' # the field name to store the tags. It's "__dj__video_frame_tags__" in default.
any_or_all: any # keep this sample when any/all videos meet the filter condition
- words_num_filter: # filter text with number of words out of specific range
lang: en # sample in which language
Expand Down
7 changes: 6 additions & 1 deletion data_juicer/ops/filter/video_tagging_from_frames_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(self,
contain: str = 'any',
frame_sampling_method: str = 'all_keyframes',
frame_num: PositiveInt = 3,
tag_field_name=Fields.video_frame_tags,
any_or_all: str = 'any',
*args,
**kwargs):
Expand All @@ -59,6 +60,8 @@ def __init__(self,
the first and the last frames will be extracted. If it's larger
than 2, in addition to the first and the last frames, other frames
will be extracted uniformly within the video duration.
:param tag_field_name: the field name to store the tags. It's
"__dj__video_frame_tags__" in default.
:param any_or_all: keep this sample with 'any' or 'all' strategy of
all videos. 'any': keep this sample if any videos meet the
condition. 'all': keep this sample only if all videos meet the
Expand All @@ -80,10 +83,12 @@ def __init__(self,
self.tags = set([tag.lower() for tag in tags])
self.contain_any = (contain == 'any')
self.any = (any_or_all == 'any')
self.tag_field_name = tag_field_name
self.tagging_producer = VideoTaggingFromFramesMapper(
frame_sampling_method=frame_sampling_method,
frame_num=frame_num,
accelerator=self.accelerator,
tag_field_name=self.tag_field_name,
)

def compute_stats(self, sample, rank=None, context=False):
Expand All @@ -93,7 +98,7 @@ def compute_stats(self, sample, rank=None, context=False):
return sample

def process(self, sample, rank=None):
video_tags = sample[Fields.video_frame_tags]
video_tags = sample[self.tag_field_name]
if len(video_tags) <= 0:
return True

Expand Down
2 changes: 2 additions & 0 deletions data_juicer/ops/mapper/video_tagging_from_frames_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def __init__(self,
the first and the last frames will be extracted. If it's larger
than 2, in addition to the first and the last frames, other frames
will be extracted uniformly within the video duration.
:param tag_field_name: the field name to store the tags. It's
"__dj__video_frame_tags__" in default.
:param args: extra args
:param kwargs: extra args
"""
Expand Down

0 comments on commit 08e76c8

Please sign in to comment.