* fix problems of unittest

modelscope · Sep 9, 2024 · 1dd0fb8 · 1dd0fb8
1 parent 440f21d
commit 1dd0fb8
Show file tree

Hide file tree

Showing 4 changed files with 58 additions and 64 deletions.
diff --git a/data_juicer/ops/mapper/video_tagging_from_audio_mapper.py b/data_juicer/ops/mapper/video_tagging_from_audio_mapper.py
@@ -1,4 +1,5 @@
 import librosa
+import numpy as np
 
 from data_juicer.utils.availability_utils import AvailabilityChecking
 from data_juicer.utils.constant import Fields
@@ -58,7 +59,7 @@ def process(self, sample, rank=None):
 
         # there is no video in this sample
         if self.video_key not in sample or not sample[self.video_key]:
-            sample[self.tag_field_name] = []
+            sample[self.tag_field_name] = np.array([], dtype=np.str_)
             return sample
 
         # load video paths
@@ -87,11 +88,11 @@ def process(self, sample, rank=None):
                 sr = self._model_sampling_rate
             inputs = feature_extractor(y,
                                        sampling_rate=sr,
-                                       return_tensors='pt')
+                                       return_tensors='pt').to(model.device)
             with torch.no_grad():
                 logits = model(**inputs).logits
             predicted_tag_id = torch.argmax(logits, dim=-1).item()
             predicted_tag = model.config.id2label[predicted_tag_id]
             video_audio_tags.append(predicted_tag)
-        sample[self.tag_field_name] = video_audio_tags
+        sample[self.tag_field_name] = np.array(video_audio_tags, dtype=np.str_)
         return sample
diff --git a/data_juicer/ops/mapper/video_tagging_from_frames_mapper.py b/data_juicer/ops/mapper/video_tagging_from_frames_mapper.py
@@ -101,7 +101,7 @@ def process(self, sample, rank=None, context=False):
                 frames = extract_video_frames_uniformly(video, self.frame_num)
             else:
                 video_tags.append([])
-                frames = []
+                continue
 
             frame_tensor = torch.stack([
                 self.transform(frame.to_image()) for frame in frames

diff --git a/tests/ops/mapper/test_video_tagging_from_audio_mapper.py b/tests/ops/mapper/test_video_tagging_from_audio_mapper.py
@@ -6,9 +6,8 @@
     VideoTaggingFromAudioMapper
 from data_juicer.utils.constant import Fields
 from data_juicer.utils.mm_utils import SpecialTokens
-from data_juicer.utils.unittest_utils import DataJuicerTestCaseBase, SKIPPED_TESTS
+from data_juicer.utils.unittest_utils import DataJuicerTestCaseBase
 
-@SKIPPED_TESTS.register_module()
 class VideoTaggingFromAudioMapperTest(DataJuicerTestCaseBase):
     data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..',
                              'data')
@@ -33,8 +32,7 @@ def _run_video_tagging_from_audio_mapper(self,
                                              num_proc=1):
         dataset = Dataset.from_list(source_list)
         dataset = dataset.map(op.process, num_proc=num_proc)
-        res_list = dataset.select_columns([tag_field_name
-                                           ])[tag_field_name]
+        res_list = dataset.select_columns([tag_field_name])[tag_field_name]
         self.assertEqual(res_list, target_list)
 
     def test(self):

diff --git a/tests/ops/mapper/test_video_tagging_from_frames_mapper.py b/tests/ops/mapper/test_video_tagging_from_frames_mapper.py
@@ -7,9 +7,8 @@
     VideoTaggingFromFramesMapper
 from data_juicer.utils.constant import Fields
 from data_juicer.utils.mm_utils import SpecialTokens
-from data_juicer.utils.unittest_utils import DataJuicerTestCaseBase, SKIPPED_TESTS
+from data_juicer.utils.unittest_utils import DataJuicerTestCaseBase
 
-@SKIPPED_TESTS.register_module()
 class VideoTaggingFromFramesMapperTest(DataJuicerTestCaseBase):
     data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..',
                              'data')
@@ -55,17 +54,18 @@ def test(self):
             'videos': [self.vid2_path],
             Fields.video_frame_tags: [[
                 'man', 'shirt', 't shirt', 't-shirt', 'wear', 'white', 'boy',
-                'catch', 'hand', 'blind', 'cotton candy', 'ball', 'person'
+                'catch', 'hand', 'blind', 'cotton candy', 'tennis racket',
+                'ball', 'person'
             ]]
         }, {
             'text':
             f'{SpecialTokens.video} 两个长头发的女子正坐在一张圆桌前讲话互动。 {SpecialTokens.eoc}',
             'videos': [self.vid3_path],
             Fields.video_frame_tags: [[
-                'woman', 'table', 'girl', 'sit', 'person', 'laptop',
-                'bookshelf', 'conversation', 'round table', 'computer', 'man',
-                'closet', 'stool', 'computer screen', 'laugh', 'cabinet',
-                'hand', 'selfie', 'stand'
+                'woman', 'table', 'sit', 'person', 'laptop', 'bookshelf',
+                'conversation', 'round table', 'closet', 'computer', 'girl',
+                'man', 'stool', 'computer screen', 'laugh', 'cabinet', 'hand',
+                'selfie', 'stand'
             ]]
         }]
         op = VideoTaggingFromFramesMapper()
@@ -90,7 +90,7 @@ def test_specified_tag_field_name(self):
             'text':
             f'{SpecialTokens.video} 白色的小羊站在一旁讲话。旁边还有两只灰色猫咪和一只拉着灰狼的猫咪。',
             'videos': [self.vid1_path],
-            Fields.video_frame_tags: [[
+            tag_field_name: [[
                 'animal', 'ray', 'text', 'writing', 'yellow', 'game',
                 'screenshot', 'cartoon', 'cartoon character', 'person', 'robe',
                 'sky'
@@ -99,19 +99,20 @@ def test_specified_tag_field_name(self):
             'text':
             f'{SpecialTokens.video} 身穿白色上衣的男子，拿着一个东西，拍打自己的胃部。{SpecialTokens.eoc}',
             'videos': [self.vid2_path],
-            Fields.video_frame_tags: [[
+            tag_field_name: [[
                 'man', 'shirt', 't shirt', 't-shirt', 'wear', 'white', 'boy',
-                'catch', 'hand', 'blind', 'cotton candy', 'ball', 'person'
+                'catch', 'hand', 'blind', 'cotton candy', 'tennis racket',
+                'ball', 'person'
             ]]
         }, {
             'text':
             f'{SpecialTokens.video} 两个长头发的女子正坐在一张圆桌前讲话互动。 {SpecialTokens.eoc}',
             'videos': [self.vid3_path],
             tag_field_name: [[
-                'woman', 'table', 'girl', 'sit', 'person', 'laptop',
-                'bookshelf', 'conversation', 'round table', 'computer', 'man',
-                'closet', 'stool', 'computer screen', 'laugh', 'cabinet',
-                'hand', 'selfie', 'stand'
+                'woman', 'table', 'sit', 'person', 'laptop', 'bookshelf',
+                'conversation', 'round table', 'closet', 'computer', 'girl',
+                'man', 'stool', 'computer screen', 'laugh', 'cabinet', 'hand',
+                'selfie', 'stand'
             ]]
         }]
         op = VideoTaggingFromFramesMapper(tag_field_name=tag_field_name)
@@ -135,28 +136,26 @@ def test_uniform(self):
             f'{SpecialTokens.video} 白色的小羊站在一旁讲话。旁边还有两只灰色猫咪和一只拉着灰狼的猫咪。',
             'videos': [self.vid1_path],
             Fields.video_frame_tags: [[
-                'animal', 'cartoon', 'anime', 'game', 'screenshot',
-                'video game', 'robe', 'ray', 'text', 'writing', 'yellow',
-                'doll', 'tail', 'cartoon character', 'sky', 'person'
-            ]]
+                'cartoon', 'animal', 'anime', 'game', 'screenshot',
+                'video game', 'cartoon character', 'robe', 'ray', 'text',
+                'writing', 'yellow', 'doll', 'tail', 'sky', 'person']]
         }, {
             'text':
             f'{SpecialTokens.video} 身穿白色上衣的男子，拿着一个东西，拍打自己的胃部。{SpecialTokens.eoc}',
             'videos': [self.vid2_path],
             Fields.video_frame_tags: [[
                 'man', 'shirt', 't shirt', 't-shirt', 'wear', 'white', 'boy',
-                'hand', 'catch', 'bulletin board', 'blind', 'play', 'Wii',
-                'cotton candy', 'tennis racket', 'game controller', 'remote',
-                'stand', 'video game', 'Wii controller', 'racket',
-                'baseball uniform', 'toy', 'green'
-            ]]
+                'hand', 'catch', 'bulletin board', 'Wii', 'cotton candy',
+                'tennis racket', 'blind', 'game controller', 'remote', 'stand',
+                'video game', 'Wii controller', 'play', 'baseball uniform',
+                'toy', 'green']]
         }, {
             'text':
             f'{SpecialTokens.video} 两个长头发的女子正坐在一张圆桌前讲话互动。 {SpecialTokens.eoc}',
             'videos': [self.vid3_path],
             Fields.video_frame_tags: [[
                 'table', 'sit', 'woman', 'bookshelf', 'conversation', 'person',
-                'round table', 'computer', 'girl', 'laptop', 'man', 'closet',
+                'round table', 'computer', 'girl', 'man', 'closet', 'laptop',
                 'stand', 'computer screen', 'talk', 'room', 'stool', 'hand',
                 'point'
             ]]
@@ -185,7 +184,7 @@ def test_multi_process(self):
         }]
         tgt_list = [{
             'text':
-            f'{SpecialTokens.video} 白色的小羊站在一旁讲话。旁边还有两只灰色猫咪和一只拉着灰狼的猫咪。',
+                f'{SpecialTokens.video} 白色的小羊站在一旁讲话。旁边还有两只灰色猫咪和一只拉着灰狼的猫咪。',
             'videos': [self.vid1_path],
             Fields.video_frame_tags: [[
                 'animal', 'ray', 'text', 'writing', 'yellow', 'game',
@@ -194,21 +193,22 @@ def test_multi_process(self):
             ]]
         }, {
             'text':
-            f'{SpecialTokens.video} 身穿白色上衣的男子，拿着一个东西，拍打自己的胃部。{SpecialTokens.eoc}',
+                f'{SpecialTokens.video} 身穿白色上衣的男子，拿着一个东西，拍打自己的胃部。{SpecialTokens.eoc}',
             'videos': [self.vid2_path],
             Fields.video_frame_tags: [[
                 'man', 'shirt', 't shirt', 't-shirt', 'wear', 'white', 'boy',
-                'catch', 'hand', 'blind', 'cotton candy', 'ball', 'person'
+                'catch', 'hand', 'blind', 'cotton candy', 'tennis racket',
+                'ball', 'person'
             ]]
         }, {
             'text':
-            f'{SpecialTokens.video} 两个长头发的女子正坐在一张圆桌前讲话互动。 {SpecialTokens.eoc}',
+                f'{SpecialTokens.video} 两个长头发的女子正坐在一张圆桌前讲话互动。 {SpecialTokens.eoc}',
             'videos': [self.vid3_path],
             Fields.video_frame_tags: [[
-                'woman', 'table', 'girl', 'sit', 'person', 'laptop',
-                'bookshelf', 'conversation', 'round table', 'computer', 'man',
-                'closet', 'stool', 'computer screen', 'laugh', 'cabinet',
-                'hand', 'selfie', 'stand'
+                'woman', 'table', 'sit', 'person', 'laptop', 'bookshelf',
+                'conversation', 'round table', 'closet', 'computer', 'girl',
+                'man', 'stool', 'computer screen', 'laugh', 'cabinet', 'hand',
+                'selfie', 'stand'
             ]]
         }]
         op = VideoTaggingFromFramesMapper()
@@ -243,27 +243,25 @@ def test_multi_chunk(self):
                 'animal', 'ray', 'text', 'writing', 'yellow', 'game',
                 'screenshot', 'cartoon', 'cartoon character', 'person', 'robe',
                 'sky'
-            ],
-             [
-                 'man', 'shirt', 't shirt', 't-shirt', 'wear', 'white', 'boy',
-                 'catch', 'hand', 'blind', 'cotton candy', 'ball', 'person'
-             ]]
+            ], [
+                'man', 'shirt', 't shirt', 't-shirt', 'wear', 'white', 'boy',
+                'catch', 'hand', 'blind', 'cotton candy', 'tennis racket',
+                'ball', 'person'
+            ]]
         }, {
             'text':
             f'{SpecialTokens.video} 身穿白色上衣的男子，拿着一个东西，拍打自己的胃部。{SpecialTokens.eoc}{SpecialTokens.video} 两个长头发的女子正坐在一张圆桌前讲话互动。 {SpecialTokens.eoc}',
             'videos': [self.vid2_path, self.vid3_path],
             Fields.video_frame_tags: [[
                 'man', 'shirt', 't shirt', 't-shirt', 'wear', 'white', 'boy',
-                'catch', 'hand', 'blind', 'cotton candy', 'ball', 'person'
-            ],
-                                      [
-                                          'woman', 'table', 'girl', 'sit',
-                                          'person', 'laptop', 'bookshelf',
-                                          'conversation', 'round table',
-                                          'computer', 'man', 'closet', 'stool',
-                                          'computer screen', 'laugh',
-                                          'cabinet', 'hand', 'selfie', 'stand'
-                                      ]]
+                'catch', 'hand', 'blind', 'cotton candy', 'tennis racket',
+                'ball', 'person'
+            ], [
+                'woman', 'table', 'sit', 'person', 'laptop', 'bookshelf',
+                'conversation', 'round table', 'closet', 'computer', 'girl',
+                'man', 'stool', 'computer screen', 'laugh', 'cabinet', 'hand',
+                'selfie', 'stand'
+            ]]
         }, {
             'text':
             f'{SpecialTokens.video} 白色的小羊站在一旁讲话。旁边还有两只灰色猫咪和一只拉着灰狼的猫咪。{SpecialTokens.eoc}{SpecialTokens.video} 两个长头发的女子正坐在一张圆桌前讲话互动。 {SpecialTokens.eoc}',
@@ -272,15 +270,12 @@ def test_multi_chunk(self):
                 'animal', 'ray', 'text', 'writing', 'yellow', 'game',
                 'screenshot', 'cartoon', 'cartoon character', 'person', 'robe',
                 'sky'
-            ],
-                                      [
-                                          'woman', 'table', 'girl', 'sit',
-                                          'person', 'laptop', 'bookshelf',
-                                          'conversation', 'round table',
-                                          'computer', 'man', 'closet', 'stool',
-                                          'computer screen', 'laugh',
-                                          'cabinet', 'hand', 'selfie', 'stand'
-                                      ]]
+            ], [
+                'woman', 'table', 'sit', 'person', 'laptop', 'bookshelf',
+                'conversation', 'round table', 'closet', 'computer', 'girl',
+                'man', 'stool', 'computer screen', 'laugh', 'cabinet', 'hand',
+                'selfie', 'stand'
+            ]]
         }]
         op = VideoTaggingFromFramesMapper()
         self._run_video_tagging_from_frames_mapper(op, ds_list, tgt_list)