Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: test set submission results obtained through the semeval interface #16

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions classification/per_label_results.csv
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,106 @@ experiment,label,precision,recall,f1
6,"Name calling/Labeling",0.5180722891566265,0.3706896551724138,0.43216080402010054
6,"Loaded Language",0.5520833333333334,0.3925925925925926,0.4588744588744589
6,"Misrepresentation of Someone's Position (Straw Man)",0.0,0.0,0.0
7,"Ad Hominem",0.6986899563318777,0.6530612244897959,0.6751054852320675
7,"Appeal to authority",0.75,0.7142857142857143,0.7317073170731706
7,"Appeal to fear/prejudice",0.26666666666666666,0.14814814814814814,0.19047619047619047
7,"Bandwagon",0.6,0.42857142857142855,0.5
7,"Black-and-white Fallacy/Dictatorship",0.36,0.16981132075471697,0.23076923076923075
7,"Causal Oversimplification",0.1111111111111111,0.047619047619047616,0.06666666666666667
7,"Distraction",0.38461538461538464,0.17857142857142858,0.24390243902439027
7,"Doubt",0.375,0.125,0.1875
7,"Exaggeration/Minimisation",0.375,0.1111111111111111,0.17142857142857143
7,"Flag-waving",0.5,0.3333333333333333,0.4
7,"Glittering generalities (Virtue)",0.44,0.3055555555555556,0.36065573770491804
7,"Loaded Language",0.5520833333333334,0.3925925925925926,0.4588744588744589
7,"Logos",0.7307692307692307,0.6653696498054474,0.6965376782077393
7,"Misrepresentation of Someone's Position (Straw Man)",0.0,0.0,0.0
7,"Name calling/Labeling",0.5180722891566265,0.3706896551724138,0.43216080402010054
7,"Obfuscation, Intentional vagueness, Confusion",0.0,0.0,0.0
7,"Presenting Irrelevant Data (Red Herring)",0.0,0.0,0.0
7,"Reductio ad hitlerum",0.0,0.0,0.0
7,"Repetition",0.5,0.21739130434782608,0.30303030303030304
7,"Slogans",0.48484848484848486,0.32,0.38554216867469876
7,"Smears",0.5508474576271186,0.45774647887323944,0.5
7,"Thought-terminating cliché",0.3888888888888889,0.18421052631578946,0.25
7,"Whataboutism",0.1111111111111111,0.047619047619047616,0.06666666666666667
8,"Appeal to authority",0.75,0.7142857142857143,0.7317073170731706
8,"Appeal to fear/prejudice",0.26666666666666666,0.14814814814814814,0.19047619047619047
8,"Bandwagon",0.6,0.42857142857142855,0.5
8,"Black-and-white Fallacy/Dictatorship",0.36,0.16981132075471697,0.23076923076923075
8,"Causal Oversimplification",0.1111111111111111,0.047619047619047616,0.06666666666666667
8,"Doubt",0.375,0.125,0.1875
8,"Exaggeration/Minimisation",0.375,0.1111111111111111,0.17142857142857143
8,"Flag-waving",0.5,0.3333333333333333,0.4
8,"Glittering generalities (Virtue)",0.44,0.3055555555555556,0.36065573770491804
8,"Loaded Language",0.5520833333333334,0.3925925925925926,0.4588744588744589
8,"Misrepresentation of Someone's Position (Straw Man)",0.0,0.0,0.0
8,"Name calling/Labeling",0.5180722891566265,0.3706896551724138,0.43216080402010054
8,"Obfuscation, Intentional vagueness, Confusion",0.0,0.0,0.0
8,"Presenting Irrelevant Data (Red Herring)",0.0,0.0,0.0
8,"Reductio ad hitlerum",0.0,0.0,0.0
8,"Repetition",0.5,0.21739130434782608,0.30303030303030304
8,"Slogans",0.48484848484848486,0.32,0.38554216867469876
8,"Smears",0.5508474576271186,0.45774647887323944,0.5
8,"Thought-terminating cliché",0.3888888888888889,0.18421052631578946,0.25
8,"Whataboutism",0.1111111111111111,0.047619047619047616,0.06666666666666667
9,"Appeal to authority",0.7288135593220338,0.6825396825396826,0.7049180327868851
9,"Appeal to fear/prejudice",0.23076923076923078,0.1111111111111111,0.15
9,"Bandwagon",0.75,0.42857142857142855,0.5454545454545454
9,"Black-and-white Fallacy/Dictatorship",0.34782608695652173,0.1509433962264151,0.2105263157894737
9,"Causal Oversimplification",0.1111111111111111,0.047619047619047616,0.06666666666666667
9,"Doubt",0.5714285714285714,0.16666666666666666,0.25806451612903225
9,"Exaggeration/Minimisation",0.375,0.1111111111111111,0.17142857142857143
9,"Flag-waving",0.4482758620689655,0.30952380952380953,0.3661971830985915
9,"Glittering generalities (Virtue)",0.5,0.2777777777777778,0.35714285714285715
9,"Loaded Language",0.5408163265306123,0.3925925925925926,0.4549356223175966
9,"Misrepresentation of Someone's Position (Straw Man)",0.0,0.0,0.0
9,"Name calling/Labeling",0.5443037974683544,0.3706896551724138,0.441025641025641
9,"Obfuscation, Intentional vagueness, Confusion",0.0,0.0,0.0
9,"Presenting Irrelevant Data (Red Herring)",0.0,0.0,0.0
9,"Reductio ad hitlerum",0.0,0.0,0.0
9,"Repetition",0.5,0.21739130434782608,0.30303030303030304
9,"Slogans",0.5172413793103449,0.3,0.379746835443038
9,"Smears",0.5652173913043478,0.45774647887323944,0.5058365758754865
9,"Thought-terminating cliché",0.3888888888888889,0.18421052631578946,0.25
9,"Whataboutism",0.14285714285714285,0.047619047619047616,0.07142857142857142
10,"Appeal to authority",0.7068965517241379,0.6507936507936508,0.6776859504132232
10,"Appeal to fear/prejudice",0.16666666666666666,0.037037037037037035,0.06060606060606061
10,"Bandwagon",0.75,0.42857142857142855,0.5454545454545454
10,"Black-and-white Fallacy/Dictatorship",0.3,0.11320754716981132,0.1643835616438356
10,"Causal Oversimplification",0.0,0.0,0.0
10,"Doubt",0.0,0.0,0.0
10,"Exaggeration/Minimisation",0.5,0.1111111111111111,0.1818181818181818
10,"Flag-waving",0.5217391304347826,0.2857142857142857,0.3692307692307692
10,"Glittering generalities (Virtue)",0.5,0.19444444444444445,0.28
10,"Loaded Language",0.5568181818181818,0.362962962962963,0.4394618834080717
10,"Misrepresentation of Someone's Position (Straw Man)",0.0,0.0,0.0
10,"Name calling/Labeling",0.5396825396825397,0.29310344827586204,0.37988826815642457
10,"Obfuscation, Intentional vagueness, Confusion",0.0,0.0,0.0
10,"Presenting Irrelevant Data (Red Herring)",0.0,0.0,0.0
10,"Reductio ad hitlerum",0.0,0.0,0.0
10,"Repetition",0.5,0.13043478260869565,0.20689655172413793
10,"Slogans",0.5263157894736842,0.2,0.28985507246376807
10,"Smears",0.5420560747663551,0.4084507042253521,0.465863453815261
10,"Thought-terminating cliché",0.5,0.10526315789473684,0.17391304347826086
10,"Whataboutism",0.3333333333333333,0.047619047619047616,0.08333333333333333
11,"Appeal to authority",0.7857142857142857,0.6984126984126984,0.7394957983193275
11,"Appeal to fear/prejudice",0.2727272727272727,0.1111111111111111,0.15789473684210525
11,"Bandwagon",0.6666666666666666,0.2857142857142857,0.4
11,"Black-and-white Fallacy/Dictatorship",0.2631578947368421,0.09433962264150944,0.13888888888888887
11,"Causal Oversimplification",0.125,0.047619047619047616,0.06896551724137931
11,"Doubt",0.2857142857142857,0.08333333333333333,0.12903225806451613
11,"Exaggeration/Minimisation",0.6,0.1111111111111111,0.18750000000000003
11,"Flag-waving",0.48,0.2857142857142857,0.3582089552238806
11,"Glittering generalities (Virtue)",0.5,0.2777777777777778,0.35714285714285715
11,"Loaded Language",0.5698924731182796,0.3925925925925926,0.4649122807017544
11,"Misrepresentation of Someone's Position (Straw Man)",0.0,0.0,0.0
11,"Name calling/Labeling",0.5606060606060606,0.31896551724137934,0.4065934065934066
11,"Obfuscation, Intentional vagueness, Confusion",0.0,0.0,0.0
11,"Presenting Irrelevant Data (Red Herring)",0.0,0.0,0.0
11,"Reductio ad hitlerum",0.0,0.0,0.0
11,"Repetition",0.6666666666666666,0.2608695652173913,0.37500000000000006
11,"Slogans",0.5909090909090909,0.26,0.36111111111111116
11,"Smears",0.5686274509803921,0.4084507042253521,0.47540983606557374
11,"Thought-terminating cliché",0.35714285714285715,0.13157894736842105,0.1923076923076923
11,"Whataboutism",0.14285714285714285,0.047619047619047616,0.07142857142857142
5 changes: 5 additions & 0 deletions classification/results.csv
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,8 @@
4,jhu-clsp/bernice,cls,,-,semeval_internal,dev_set,LogisticRegression,0.5415415415415415,0.622554660529344,0.479185119574845,"Black-and-white Fallacy/Dictatorship 431 16 44 9 Glittering generalities (Virtue) 450 14 25 11 Repetition 472 5 18 5 Slogans 433 17 34 16 Doubt 471 5 21 3 Name calling/Labeling 344 40 73 43 Exaggeration/Minimisation 468 5 24 3 Ad Hominem 186 69 85 160 Causal Oversimplification 471 8 20 1 Distraction 464 8 23 5 Misrepresentation of Someone's Position (Straw Man) 494 2 4 0 Flag-waving 444 14 28 14 Thought-terminating cliché 451 11 31 7 Logos 180 63 86 171 Presenting Irrelevant Data (Red Herring) 496 0 4 0 Smears 305 53 77 65 Reductio ad hitlerum 494 2 4 0 Obfuscation, Intentional vagueness, Confusion 498 0 2 0 Appeal to fear/prejudice 462 11 23 4 Loaded Language 322 43 82 53 Whataboutism 471 8 20 1 Appeal to authority 422 15 18 45 Bandwagon 491 2 4 3 ",1707070176
5,jhu-clsp/bernice,cls,,-,semeval_internal,dev_set,ClassifierChain,0.5312419645152995,0.6333537706928265,0.4574844995571302,"Exaggeration/Minimisation 470 3 24 3 Whataboutism 475 4 20 1 Bandwagon 492 1 4 3 Presenting Irrelevant Data (Red Herring) 495 1 4 0 Smears 307 51 71 71 Distraction 468 4 27 1 Thought-terminating cliché 455 7 31 7 Loaded Language 328 37 81 54 Logos 194 49 106 151 Appeal to fear/prejudice 461 12 20 7 Causal Oversimplification 475 4 20 1 Reductio ad hitlerum 496 0 4 0 Doubt 465 11 18 6 Repetition 475 2 20 3 Name calling/Labeling 348 36 76 40 Glittering generalities (Virtue) 455 9 27 9 Black-and-white Fallacy/Dictatorship 405 42 36 17 Flag-waving 442 16 23 19 Ad Hominem 207 48 114 131 Misrepresentation of Someone's Position (Straw Man) 496 0 4 0 Slogans 420 30 34 16 Appeal to authority 418 19 19 44 Obfuscation, Intentional vagueness, Confusion 498 0 2 0 ",1707077292
6,jhu-clsp/bernice,cls,,-,semeval2024,dev_set,LogisticRegression,0.4996013818761626,0.6245847176079734,0.4162976085031001,"Doubt 471 5 21 3 Presenting Irrelevant Data (Red Herring) 496 0 4 0 Slogans 433 17 34 16 Appeal to fear/prejudice 462 11 23 4 Exaggeration/Minimisation 468 5 24 3 Obfuscation, Intentional vagueness, Confusion 498 0 2 0 Causal Oversimplification 471 8 20 1 Black-and-white Fallacy/Dictatorship 431 16 44 9 Reductio ad hitlerum 494 2 4 0 Glittering generalities (Virtue) 450 14 25 11 Repetition 472 5 18 5 Thought-terminating cliché 451 11 31 7 Smears 305 53 77 65 Appeal to authority 422 15 18 45 Flag-waving 444 14 28 14 Bandwagon 491 2 4 3 Whataboutism 471 8 20 1 Name calling/Labeling 344 40 73 43 Loaded Language 322 43 82 53 Misrepresentation of Someone's Position (Straw Man) 494 2 4 0 ",1707080779
7,jhu-clsp/bernice,cls,,-,semeval_internal,dev_set,LogisticRegression,0.5415415415415415,0.622554660529344,0.479185119574845,"Ad Hominem 186 69 85 160 Appeal to authority 422 15 18 45 Appeal to fear/prejudice 462 11 23 4 Bandwagon 491 2 4 3 Black-and-white Fallacy/Dictatorship 431 16 44 9 Causal Oversimplification 471 8 20 1 Distraction 464 8 23 5 Doubt 471 5 21 3 Exaggeration/Minimisation 468 5 24 3 Flag-waving 444 14 28 14 Glittering generalities (Virtue) 450 14 25 11 Loaded Language 322 43 82 53 Logos 180 63 86 171 Misrepresentation of Someone's Position (Straw Man) 494 2 4 0 Name calling/Labeling 344 40 73 43 Obfuscation, Intentional vagueness, Confusion 498 0 2 0 Presenting Irrelevant Data (Red Herring) 496 0 4 0 Reductio ad hitlerum 494 2 4 0 Repetition 472 5 18 5 Slogans 433 17 34 16 Smears 305 53 77 65 Thought-terminating cliché 451 11 31 7 Whataboutism 471 8 20 1 ",1713202515
8,jhu-clsp/bernice,cls,,-,semeval2024,dev_set,LogisticRegression,0.4996013818761626,0.6245847176079734,0.4162976085031001,"Appeal to authority 422 15 18 45 Appeal to fear/prejudice 462 11 23 4 Bandwagon 491 2 4 3 Black-and-white Fallacy/Dictatorship 431 16 44 9 Causal Oversimplification 471 8 20 1 Doubt 471 5 21 3 Exaggeration/Minimisation 468 5 24 3 Flag-waving 444 14 28 14 Glittering generalities (Virtue) 450 14 25 11 Loaded Language 322 43 82 53 Misrepresentation of Someone's Position (Straw Man) 494 2 4 0 Name calling/Labeling 344 40 73 43 Obfuscation, Intentional vagueness, Confusion 498 0 2 0 Presenting Irrelevant Data (Red Herring) 496 0 4 0 Reductio ad hitlerum 494 2 4 0 Repetition 472 5 18 5 Slogans 433 17 34 16 Smears 305 53 77 65 Thought-terminating cliché 451 11 31 7 Whataboutism 471 8 20 1 ",1713204359
9,jhu-clsp/bernice,cls,,-,semeval2024,dev_set,LogisticRegression,0.4948731786292499,0.6332872928176796,0.4061116031886625,"Appeal to authority 421 16 20 43 Appeal to fear/prejudice 463 10 24 3 Bandwagon 492 1 4 3 Black-and-white Fallacy/Dictatorship 432 15 45 8 Causal Oversimplification 471 8 20 1 Doubt 473 3 20 4 Exaggeration/Minimisation 468 5 24 3 Flag-waving 442 16 29 13 Glittering generalities (Virtue) 454 10 26 10 Loaded Language 320 45 82 53 Misrepresentation of Someone's Position (Straw Man) 495 1 4 0 Name calling/Labeling 348 36 73 43 Obfuscation, Intentional vagueness, Confusion 498 0 2 0 Presenting Irrelevant Data (Red Herring) 496 0 4 0 Reductio ad hitlerum 494 2 4 0 Repetition 472 5 18 5 Slogans 436 14 35 15 Smears 308 50 77 65 Thought-terminating cliché 451 11 31 7 Whataboutism 473 6 20 1 ",1713212203
10,jhu-clsp/bernice,cls,,-,semeval2024,dev_set,LogisticRegression,0.4425036390101892,0.6457094307561597,0.3365810451727192,"Appeal to authority 420 17 22 41 Appeal to fear/prejudice 468 5 26 1 Bandwagon 492 1 4 3 Black-and-white Fallacy/Dictatorship 433 14 47 6 Causal Oversimplification 474 5 21 0 Doubt 474 2 24 0 Exaggeration/Minimisation 470 3 24 3 Flag-waving 447 11 30 12 Glittering generalities (Virtue) 457 7 29 7 Loaded Language 326 39 86 49 Misrepresentation of Someone's Position (Straw Man) 495 1 4 0 Name calling/Labeling 355 29 82 34 Obfuscation, Intentional vagueness, Confusion 498 0 2 0 Presenting Irrelevant Data (Red Herring) 495 1 4 0 Reductio ad hitlerum 495 1 4 0 Repetition 474 3 20 3 Slogans 441 9 40 10 Smears 309 49 84 58 Thought-terminating cliché 458 4 34 4 Whataboutism 477 2 20 1 ",1713212586
11,jhu-clsp/bernice,cls,,-,semeval2024,dev_set,LogisticRegression,0.4709203839638622,0.6495327102803738,0.36935341009743133,"Appeal to authority 425 12 19 44 Appeal to fear/prejudice 465 8 24 3 Bandwagon 492 1 5 2 Black-and-white Fallacy/Dictatorship 433 14 48 5 Causal Oversimplification 472 7 20 1 Doubt 471 5 22 2 Exaggeration/Minimisation 471 2 24 3 Flag-waving 445 13 30 12 Glittering generalities (Virtue) 454 10 26 10 Loaded Language 325 40 82 53 Misrepresentation of Someone's Position (Straw Man) 494 2 4 0 Name calling/Labeling 355 29 79 37 Obfuscation, Intentional vagueness, Confusion 498 0 2 0 Presenting Irrelevant Data (Red Herring) 496 0 4 0 Reductio ad hitlerum 495 1 4 0 Repetition 474 3 17 6 Slogans 441 9 37 13 Smears 314 44 84 58 Thought-terminating cliché 453 9 33 5 Whataboutism 473 6 20 1 ",1713213167
95 changes: 5 additions & 90 deletions commands.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
#!/bin/bash

# ft extraction using semeval base
python -m src.feature_extraction \
--dataset semeval_internal \
--model jhu-clsp/bernice \
--extraction_method cls

# semeval_internal
# semeval
python -m src.classification \
--dataset semeval2024 \
--train_features feature_extraction/semeval2024/bernice/1707079469_jhu-clsp-bernice_train_features.json \
Expand All @@ -17,86 +11,7 @@ python -m src.classification \
# semeval_internal trained with train test dev
python -m src.classification \
--dataset semeval_internal \
--train_features feature_extraction/1706826974_jhu-clsp-bernice_train_features.json \
--test_features feature_extraction/1706826974_jhu-clsp-bernice_test_features.json \
--dev_features feature_extraction/1706826974_jhu-clsp-bernice_dev_features.json \
--classifier LogisticRegression

# ft extraction semeval augmented
python -m src.feature_extraction \
--dataset semeval_augmented \
--model xlm-roberta-base \
--extraction_method cls


# semeval2024
python -m src.classification \
--dataset semeval2024 \
--train_features feature_extraction/1701981179_xlm-roberta-base_train_features.json \
--test_features feature_extraction/1701981179_xlm-roberta-base_test_features.json \
--dev_features feature_extraction/1701981179_xlm-roberta-base_dev_features.json \
--classifier LogisticRegression

# using ft from bernice
python -m src.classification \
--dataset semeval2024 \
--train_features feature_extraction/1706662916_jhu-clsp-bernice_train_features.json \
--test_features feature_extraction/1706662916_jhu-clsp-bernice_test_features.json \
--dev_features feature_extraction/1706662916_jhu-clsp-bernice_dev_features.json \
--classifier LogisticRegression

# using ft from bertweet-base
python -m src.classification \
--dataset semeval2024 \
--train_features feature_extraction/1706660251_vinai-bertweet-base_train_features.json \
--test_features feature_extraction/1706660251_vinai-bertweet-base_test_features.json \
--dev_features feature_extraction/1706660251_vinai-bertweet-base_dev_features.json \
--classifier LogisticRegression

# augmented from ptc reductio
python -m src.classification \
--dataset semeval_augmented \
--train_features feature_extraction/1706560674_xlm-roberta-base_train_features.json \
--test_features feature_extraction/1706560674_xlm-roberta-base_test_features.json \
--dev_features feature_extraction/1706560674_xlm-roberta-base_dev_features.json \
--classifier LogisticRegression

# augmented from 2301
python -m src.classification \
--dataset semeval_augmented \
--train_features feature_extraction/1706492805_xlm-roberta-base_train_features.json \
--test_features feature_extraction/1706492805_xlm-roberta-base_test_features.json \
--dev_features feature_extraction/1706492805_xlm-roberta-base_dev_features.json \
--classifier LogisticRegression

# augmented label preserving Reductio ad Hitlerum
python -m src.classification \
--dataset semeval_augmented \
--train_features feature_extraction/1706486057_xlm-roberta-base_train_features.json \
--test_features feature_extraction/1706486057_xlm-roberta-base_test_features.json \
--dev_features feature_extraction/1706486057_xlm-roberta-base_dev_features.json \
--classifier LogisticRegression

# augmented Reductio ad Hitlerum
python -m src.classification \
--dataset semeval_augmented \
--train_features feature_extraction/1706482884_xlm-roberta-base_train_features.json \
--test_features feature_extraction/1706482884_xlm-roberta-base_test_features.json \
--dev_features feature_extraction/1706482884_xlm-roberta-base_dev_features.json \
--classifier LogisticRegression

# augmented label preserving Smears
python -m src.classification \
--dataset semeval_augmented \
--train_features feature_extraction/1706474866_xlm-roberta-base_train_features.json \
--test_features feature_extraction/1706474866_xlm-roberta-base_test_features.json \
--dev_features feature_extraction/1706474866_xlm-roberta-base_dev_features.json \
--classifier LogisticRegression

# augmented Smears
python -m src.classification \
--dataset semeval_augmented \
--train_features feature_extraction/1706466400_xlm-roberta-base_train_features.json \
--test_features feature_extraction/1706466400_xlm-roberta-base_test_features.json \
--dev_features feature_extraction/1706466400_xlm-roberta-base_dev_features.json \
--classifier LogisticRegression
--train_features feature_extraction/semeval_internal/bernice/1707069752_jhu-clsp-bernice_train_features.json \
--test_features feature_extraction/semeval_internal/bernice/1707069752_jhu-clsp-bernice_test_features.json \
--dev_features feature_extraction/semeval_internal/bernice/1707069752_jhu-clsp-bernice_dev_features.json \
--classifier LogisticRegression
Loading