weat.py

import numpy as np
import random
from itertools import filterfalse
from itertools import combinations
import codecs
import utils
import os
import pickle
import logging
import argparse
import time
from collections import OrderedDict
import math
from sklearn.metrics.pairwise import euclidean_distances


class XWEAT(object):
  """
  Perform WEAT (Word Embedding Association Test) bias tests on a language model.
  Follows from Caliskan et al 2017 (10.1126/science.aal4230).

  Credits: Basic implementation based on https://gist.github.com/SandyRogers/e5c2e938502a75dcae25216e4fae2da5
  """

  def __init__(self):
    self.embd_dict = None
    self.vocab = None
    self.embedding_matrix = None

  def set_embd_dict(self, embd_dict):
    self.embd_dict = embd_dict


  def _build_vocab_dict(self, vocab):
    self.vocab = OrderedDict()
    vocab = set(vocab)
    index = 0
    for term in vocab:
      if term in self.embd_dict:
        self.vocab[term] = index
        index += 1
      else:
        logging.warning("Not in vocab %s", term)


  def convert_by_vocab(self, items):
    """Converts a sequence of [tokens|ids] using the vocab."""
    output = []
    for item in items:
      if item in self.vocab:
        output.append(self.vocab[item])
      else:
        continue
    return output

  def _build_embedding_matrix(self):
    self.embedding_matrix = []
    for term, index in self.vocab.items():
      if term in self.embd_dict:
        self.embedding_matrix.append(self.embd_dict[term])
      else:
        raise AssertionError("This should not happen.")
    self.embd_dict = None


  def mat_normalize(self,mat, norm_order=2, axis=1):
    return mat / np.transpose([np.linalg.norm(mat, norm_order, axis)])


  def cosine(self, a, b):
    norm_a = self.mat_normalize(a)
    norm_b = self.mat_normalize(b)
    cos = np.dot(norm_a, np.transpose(norm_b))
    return cos


  def euclidean(self, a, b):
    norm_a = self.mat_normalize(a)
    norm_b = self.mat_normalize(b)
    distances = euclidean_distances(norm_a, norm_b)
    eucl = 1/ (1+distances)
    return eucl


  def csls(self, a, b, k=10):
    norm_a = self.mat_normalize(a)
    norm_b = self.mat_normalize(b)
    sims_local_a = np.dot(norm_a, np.transpose(norm_a))
    sims_local_b = np.dot(norm_b, np.transpose(norm_b))

    csls_norms_a = np.mean(np.sort(sims_local_a, axis=1)[:, -k - 1:-1], axis=1)
    csls_norms_b = np.mean(np.sort(sims_local_b, axis=1)[:, -k - 1:-1], axis=1)
    loc_sims = np.add(np.transpose(np.tile(csls_norms_a, (len(csls_norms_b), 1))),
                      np.tile(csls_norms_b, (len(csls_norms_a), 1)))

    return 2 * np.dot(norm_a, np.transpose(norm_b)) - loc_sims


  def _init_similarities(self, similarity_type):
    if similarity_type == "cosine":
      self.similarities = self.cosine(self.embedding_matrix, self.embedding_matrix)
    elif similarity_type == "csls":
      self.similarities = self.csls(self.embedding_matrix, self.embedding_matrix)
    elif similarity_type == "euclidean":
      self.similarities = self.euclidean(self.embedding_matrix, self.embedding_matrix)
    else:
      raise NotImplementedError()


  def weat_1(self):
    """
    WEAT 1 (universally accepted): Targets 1=Flowers, Targets 2=Insects, Attributes 1=pleasant, Attributes 2=unpleasant
    :return: targets_1, targets_2, attributes_1, attributes_2
    """
    targets_1 = ["aster", "clover", "hyacinth", "marigold", "poppy", "azalea", "crocus", "iris", "orchid", "rose",
               "blue-bell", "daffodil", "lilac", "pansy", "tulip", "buttercup", "daisy", "lily", "peony", "violet",
               "carnation", "gladiola", "magnolia", "petunia", "zinnia"]
    targets_2 = ["ant", "caterpillar", "flea", "locust", "spider", "bedbug", "centipede", "fly", "maggot", "tarantula",
               "bee", "cockroach", "gnat", "mosquito", "termite", "beetle", "cricket", "hornet", "moth", "wasp",
               "blackfly",
               "dragonfly", "horsefly", "roach", "weevil"]
    attributes_1 = ["caress", "freedom", "health", "love", "peace", "cheer", "friend", "heaven", "loyal", "pleasure",
                "diamond", "gentle", "honest", "lucky", "rainbow", "diploma", "gift", "honor", "miracle", "sunrise",
                "family",
                "happy", "laughter", "paradise", "vacation"]
    attributes_2 = ["abuse", "crash", "filth", "murder", "sickness", "accident", "death", "grief", "poison", "stink",
                  "assault", "disaster", "hatred", "pollute", "tragedy", "divorce", "jail", "poverty", "ugly", "cancer",
                  "kill", "rotten",
                  "vomit", "agony", "prison"]
    return targets_1, targets_2, attributes_1, attributes_2


  # WEAT 2: Target 1=Musical instruments, Target 2= Weapons, Attributes 1: pleasant, Attributes 2=unpleasant
  def weat_2(self):
    targets_1 = ["bagpipe", "cello", "guitar", "lute", "trombone", "banjo", "clarinet", "harmonica", "mandolin",
                 "trumpet", "bassoon", "drum", "harp", "oboe", "tuba", "bell", "fiddle", "harpsichord", "piano",
                 "viola", "bongo",
                 "flute", "horn", "saxophone", "violin"]
    targets_2 = ["arrow", "club", "gun", "missile", "spear", "axe", "dagger", "harpoon", "pistol", "sword", "blade",
             "dynamite", "hatchet", "rifle", "tank", "bomb", "firearm", "knife", "shotgun", "teargas", "cannon",
             "grenade",
             "mace", "slingshot", "whip"]
    attributes_1 = ["caress", "freedom", "health", "love", "peace", "cheer", "friend", "heaven", "loyal", "pleasure",
                "diamond", "gentle", "honest", "lucky", "rainbow", "diploma", "gift", "honor", "miracle", "sunrise",
                "family", "happy", "laughter", "paradise", "vacation"]
    attributes_2 = ["abuse", "crash", "filth", "murder", "sickness", "accident", "death", "grief", "poison", "stink",
                  "assault", "disaster", "hatred", "pollute", "tragedy", "divorce", "jail", "poverty", "ugly", "cancer",
                  "kill", "rotten",
                  "vomit", "agony", "prison"]

    return targets_1, targets_2, attributes_1, attributes_2


  # Here they deleted the infrequent african american names, and the same number randomly choosen from the european american names
  def weat_3(self):
    # excluded in the original paper: Chip, Ian, Fred, Jed, Todd, Brandon, Wilbur, Sara, Amber, Crystal, Meredith, Shannon, Donna,
    # Bobbie-Sue, Peggy, Sue-Ellen, Wendy
    targets_1 = ["Adam", "Harry", "Josh", "Roger", "Alan", "Frank", "Justin", "Ryan", "Andrew", "Jack", "Matthew", "Stephen",
                 "Brad", "Greg", "Paul", "Hank", "Jonathan", "Peter", "Amanda", "Courtney", "Heather", "Melanie",
                 "Katie", "Betsy", "Kristin", "Nancy", "Stephanie", "Ellen", "Lauren",  "Colleen", "Emily", "Megan", "Rachel",
                 "Chip", "Ian", "Fred", "Jed", "Todd", "Brandon", "Wilbur", "Sara", "Amber", "Crystal", "Meredith", "Shannon",
                 "Donna", "Bobbie-Sue", "Peggy", "Sue-Ellen", "Wendy"]

    # excluded: Lerone, Percell, Rasaan, Rashaun, Everol, Terryl, Aiesha, Lashelle, Temeka, Tameisha, Teretha, Latonya, Shanise,
    # Sharise, Tashika, Lashandra, Shavonn, Tawanda,
    targets_2 = ["Alonzo", "Jamel",  "Theo", "Alphonse", "Jerome", "Leroy", "Torrance", "Darnell", "Lamar", "Lionel",
                 "Tyree", "Deion", "Lamont", "Malik", "Terrence", "Tyrone",  "Lavon", "Marcellus", "Wardell", "Nichelle",
                 "Shereen", "Ebony", "Latisha", "Shaniqua", "Jasmine", "Tanisha", "Tia", "Lakisha", "Latoya",  "Yolanda",
                 "Malika",  "Yvette", "Lerone", "Percell", "Rasaan", "Rashaun", "Everol", "Terryl", "Aiesha", "Lashelle",
                 "Temeka", "Tameisha", "Teretha", "Latonya", "Shanise", "Sharise", "Tashika", "Lashandra", "Shavonn", "Tawanda"]

    attributes_1 = ["caress", "freedom", "health", "love", "peace", "cheer", "friend", "heaven", "loyal", "pleasure",
                "diamond", "gentle", "honest", "lucky", "rainbow", "diploma", "gift", "honor", "miracle", "sunrise",
                "family", "happy", "laughter", "paradise", "vacation"]

    # same as before but agony and prison are replaced with bomb and evil
    attributes_2 = ["abuse", "crash", "filth", "murder", "sickness", "accident", "death", "grief", "poison", "stink",
                  "assault", "disaster", "hatred", "pollute", "tragedy", "divorce", "jail", "poverty", "ugly", "cancer",
                  "kill", "rotten",
                  "vomit", "bomb", "evil"]

    return targets_1, targets_2, attributes_1, attributes_2


  # again: african american names vs. european american names and pleasant vs unpleasant but with different names
  def weat_4(self):
    # excluded as in the original paper: Jay, Kristen, (here only excluded in the glove experiments)
    targets_1 = ["Brad", "Brendan", "Geoffrey", "Greg", "Brett",  "Matthew", "Neil", "Todd", "Allison", "Anne",
                 "Carrie", "Emily", "Jill", "Laurie",  "Meredith", "Sarah", "Jay", "Kristen"]

    # excluded in GloVe experiments:  Tremayne, Latonya,
    targets_2 = ["Darnell", "Hakim", "Jermaine", "Kareem", "Jamal", "Leroy", "Rasheed", "Tyrone", "Aisha", "Ebony",
                 "Keisha", "Kenya",  "Lakisha", "Latoya", "Tamika", "Tanisha", "Tremayne", "Latonya"]

    attributes_1 = ["caress", "freedom", "health", "love", "peace", "cheer", "friend", "heaven", "loyal", "pleasure",
                    "diamond", "gentle", "honest", "lucky", "rainbow", "diploma", "gift", "honor", "miracle", "sunrise",
                    "family", "happy", "laughter", "paradise", "vacation"]

    # same as before but agony and prison are replaced with bomb and evil
    attributes_2 = ["abuse", "crash", "filth", "murder", "sickness", "accident", "death", "grief", "poison", "stink",
                    "assault", "disaster", "hatred", "pollute", "tragedy", "divorce", "jail", "poverty", "ugly",
                    "cancer",
                    "kill", "rotten",
                    "vomit", "bomb", "evil"]

    return targets_1, targets_2, attributes_1, attributes_2


  # again african american vs european american names, but with different attributes
  def weat_5(self):
    # excluded as in the original paper: Jay, Kristen, (here only excluded in the glove experiments)
    targets_1 = ["Brad", "Brendan", "Geoffrey", "Greg", "Brett",  "Matthew", "Neil", "Todd", "Allison", "Anne",
                 "Carrie", "Emily", "Jill", "Laurie",  "Meredith", "Sarah", "Jay", "Kristen"]

    # excluded in GloVe experiments:  Tremayne, Latonya,
    targets_2 = ["Darnell", "Hakim", "Jermaine", "Kareem", "Jamal", "Leroy", "Rasheed", "Tyrone", "Aisha", "Ebony",
                 "Keisha", "Kenya",  "Lakisha", "Latoya", "Tamika", "Tanisha", "Tremayne", "Latonya"]

    attributes_1 = ["joy", "love", "peace", "wonderful", "pleasure", "friend", "laughter", "happy"]

    attributes_2 = ["agony", "terrible", "horrible", "nasty", "evil", "war", "awful", "failure"]

    return targets_1, targets_2, attributes_1, attributes_2


  # sex- career
  def weat_6(self):
    targets_1 = ["John", "Paul", "Mike", "Kevin", "Steve", "Greg", "Jeff", "Bill"]
    targets_2 = ["Amy", "Joan", "Lisa", "Sarah", "Diana", "Kate", "Ann", "Donna"]
    attributes_1 = ["executive", "management", "professional", "corporation", "salary", "office", "business", "career"]
    attributes_2 = ["home", "parents", "children", "family", "cousins", "marriage", "wedding", "relatives"]

    return targets_1, targets_2, attributes_1, attributes_2


  # sex - math
  def weat_7(self):
    targets_1 = ["math", "algebra", "geometry", "calculus", "equations", "computation", "numbers", "addition"]
    targets_2 = ["poetry", "art", "dance", "literature", "novel", "symphony", "drama", "sculpture"]
    attributes_1 = ["male", "man", "boy", "brother", "he", "him", "his", "son"]
    attributes_2 = ["female", "woman", "girl", "sister", "she", "her", "hers", "daughter"]
    return targets_1, targets_2, attributes_1, attributes_2


  # sex - science
  def weat_8(self):
    targets_1 = ["science", "technology", "physics", "chemistry", "Einstein", "NASA", "experiment", "astronomy"]
    targets_2 = ["poetry", "art", "Shakespeare", "dance", "literature", "novel", "symphony", "drama"]
    attributes_1 = ["brother", "father", "uncle", "grandfather", "son", "he", "his", "him"]
    attributes_2 = ["sister", "mother", "aunt", "grandmother", "daughter", "she", "hers", "her"]
    return targets_1, targets_2, attributes_1, attributes_2


  # desease - uncontrollability
  def weat_9(self):
    targets_1 = ["sad", "hopeless", "gloomy", "tearful", "miserable", "depressed"]
    targets_2 = ["sick", "illness", "influenza", "disease", "virus", "cancer"]
    # in w2v experiments, the've replaced short-term with short
    attributes_1 = ["impermanent", "unstable", "variable", "fleeting", "short-term", "brief", "occasional"]
    attributes_2 = ["stable", "always", "constant", "persistent", "chronic", "prolonged", "forever"]

    return targets_1, targets_2, attributes_1, attributes_2


  # old - pleasant
  def weat_10(self):
    targets_1 = ["Tiffany", "Michelle", "Cindy", "Kristy", "Brad", "Eric", "Joey", "Billy"]
    targets_2 = ["Ethel", "Bernice", "Gertrude", "Agnes", "Cecil", "Wilbert", "Mortimer", "Edgar"]
    attributes_1 = ["joy", "love", "peace", "wonderful", "pleasure", "friend", "laughter", "happy"]
    attributes_2 = ["agony", "terrible", "horrible", "nasty", "evil", "war", "awful", "failure"]

    return targets_1, targets_2, attributes_1, attributes_2
  # missing from the original IAT: arab-muslim


  # occupations
  def wefat_1(self):
    # occupations derived from th bureau of labor statistics
    targets = ["technician", "accountant", "supervisor", "engineer", "worker", "educator", "clerk", "counselor", "inspector",
              "mechanic", "manager", "therapist", "administrator", "salesperson", "receptionist", "librarian", "advisor", "pharmacist",
              "janitor", "psychologist", "physician", "carpenter", "nurse", "investigator", "bartender", "specialist", "electrician",
              "officer", "pathologist", "teacher", "lawyer", "planner", "practitioner", "plumber", "instructor",
              "surgeon", "veterinarian", "paramedic",
              "examiner", "chemist", "machinist", "appraiser", "nutritionist", "architect", "hairdresser", "baker",
              "programmer", "paralegal", "hygienist", "scientist"]

    attributes_1 = ["male", "man", "boy", "brother", "he", "him", "his", "son"]
    attributes_2 = ["female", "woman", "girl", "sister", "she", "her", "hers", "daughter"]
    return targets, attributes_1, attributes_2


  # androgynous names
  def wefat_2(self):
    targets = ["Kelly", "Tracy", "Jamie", "Jackie", "Jesse", "Courtney", "Lynn", "Taylor", "Leslie", "Shannon",
              "Stacey", "Jessie", "Shawn", "Stacy", "Casey", "Bobby", "Terry", "Lee", "Ashley", "Eddie", "Chris", "Jody", "Pat",
              "Carey", "Willie", "Morgan", "Robbie", "Joan", "Alexis", "Kris", "Frankie", "Bobbie", "Dale", "Robin", "Billie",
              "Adrian", "Kim", "Jaime", "Jean", "Francis", "Marion", "Dana", "Rene", "Johnnie", "Jordan", "Carmen", "Ollie",
              "Dominique", "Jimmie", "Shelby"]

    attributes_1 = ["male", "man", "boy", "brother", "he", "him", "his", "son"]
    attributes_2 = ["female", "woman", "girl", "sister", "she", "her", "hers", "daughter"]
    return targets, attributes_1, attributes_2


  def similarity_precomputed_sims(self, w1, w2, type="cosine"):
    return self.similarities[w1, w2]


  def word_association_with_attribute_precomputed_sims(self, w, A, B):
    return np.mean([self.similarity_precomputed_sims(w, a) for a in A]) - np.mean([self.similarity_precomputed_sims(w, b) for b in B])


  def differential_association_precomputed_sims(self, T1, T2, A1, A2):
    return np.sum([self.word_association_with_attribute_precomputed_sims(t1, A1, A2) for t1 in T1]) \
           - np.sum([self.word_association_with_attribute_precomputed_sims(t2, A1, A2) for t2 in T2])


  def weat_effect_size_precomputed_sims(self, T1, T2, A1, A2):
    return (
             np.mean([self.word_association_with_attribute_precomputed_sims(t1, A1, A2) for t1 in T1]) -
             np.mean([self.word_association_with_attribute_precomputed_sims(t2, A1, A2) for t2 in T2])
           ) / np.std([self.word_association_with_attribute_precomputed_sims(w, A1, A2) for w in T1 + T2])


  def _random_permutation(self, iterable, r=None):
    pool = tuple(iterable)
    r = len(pool) if r is None else r
    return tuple(random.sample(pool, r))

  def weat_p_value_precomputed_sims(self, T1, T2, A1, A2, sample):
    logging.info("Calculating p value ... ")
    size_of_permutation = min(len(T1), len(T2))
    T1_T2 = T1 + T2
    observed_test_stats_over_permutations = []
    total_possible_permutations = math.factorial(len(T1_T2)) / math.factorial(size_of_permutation) / math.factorial((len(T1_T2)-size_of_permutation))
    logging.info("Number of possible permutations: %d", total_possible_permutations)
    if not sample or sample >= total_possible_permutations:
      permutations = combinations(T1_T2, size_of_permutation)
    else:
      logging.info("Computing randomly first %d permutations", sample)
      permutations = set()
      while len(permutations) < sample:
        permutations.add(tuple(sorted(self._random_permutation(T1_T2, size_of_permutation))))

    for Xi in permutations:
      Yi = filterfalse(lambda w: w in Xi, T1_T2)
      observed_test_stats_over_permutations.append(self.differential_association_precomputed_sims(Xi, Yi, A1, A2))
      if len(observed_test_stats_over_permutations) % 100000 == 0:
        logging.info("Iteration %s finished", str(len(observed_test_stats_over_permutations)))
    unperturbed = self.differential_association_precomputed_sims(T1, T2, A1, A2)
    is_over = np.array([o > unperturbed for o in observed_test_stats_over_permutations])
    return is_over.sum() / is_over.size


  def weat_stats_precomputed_sims(self, T1, T2, A1, A2, sample_p=None):
    test_statistic = self.differential_association_precomputed_sims(T1, T2, A1, A2)
    effect_size = self.weat_effect_size_precomputed_sims(T1, T2, A1, A2)
    p = self.weat_p_value_precomputed_sims(T1, T2, A1, A2, sample=sample_p)
    return test_statistic, effect_size, p

  def _create_vocab(self):
    """
    >>> weat = XWEAT(None); weat._create_vocab()
    :return: all
    """
    all = []
    for i in range(1, 10):
      t1, t2, a1, a2 = getattr(self, "weat_" + str(i))()
      all = all + t1 + t2 + a1 + a2
    for i in range(1, 2):
      t1, a1, a2 = getattr(self, "wefat_" + str(i))()
      all = all + t1 + a1 + a2
    all = set(all)
    return all

  def _output_vocab(self, path="./data/vocab_en.txt"):
    """
    >>> weat = XWEAT(None); weat._output_vocab()
    """
    vocab = self._create_vocab()
    with codecs.open(path, "w", "utf8") as f:
      for w in vocab:
        f.write(w)
        f.write("\n")
      f.close()


  def run_test_precomputed_sims(self, target_1, target_2, attributes_1, attributes_2, sample_p=None, similarity_type="cosine"):
    """Run the WEAT test for differential association between two
    sets of target words and two sets of attributes.

    RETURNS:
        (d, e, p). A tuple of floats, where d is the WEAT Test statistic,
        e is the effect size, and p is the one-sided p-value measuring the
        (un)likeliness of the null hypothesis (which is that there is no
        difference in association between the two target word sets and
        the attributes).

        If e is large and p small, then differences in the model between
        the attribute word sets match differences between the targets.
    """
    vocab = target_1 + target_2 + attributes_1 + attributes_2
    self._build_vocab_dict(vocab)
    T1 = self.convert_by_vocab(target_1)
    T2 = self.convert_by_vocab(target_2)
    A1 = self.convert_by_vocab(attributes_1)
    A2 = self.convert_by_vocab(attributes_2)
    while len(T1) < len(T2):
      logging.info("Popped T2 %d", T2[-1])
      T2.pop(-1)
    while len(T2) < len(T1):
      logging.info("Popped T1 %d", T1[-1])
      T1.pop(-1)
    while len(A1) < len(A2):
      logging.info("Popped A2 %d", A2[-1])
      A2.pop(-1)
    while len(A2) < len(A1):
      logging.info("Popped A1 %d", A1[-1])
      A1.pop(-1)
    assert len(T1)==len(T2)
    assert len(A1) == len(A2)
    self._build_embedding_matrix()
    self._init_similarities(similarity_type)
    return self.weat_stats_precomputed_sims(T1, T2, A1, A2, sample_p)

  def _parse_translations(self, path="./data/vocab_en_de.csv", new_path="./data/vocab_dict_en_de.p", is_russian=False):
    """
    :param path: path of the csv file edited by our translators
    :param new_path: path of the clean dict to save
    >>> XWEAT()._parse_translations(is_russian=False)
    293
    """
    # This code probably does not work for the russian code, as dmitry did use other columns for his corrections
    with codecs.open(path, "r", "utf8") as f:
      translation_dict = {}
      for line in f.readlines():
        parts = line.split(",")
        en = parts[0]
        if en == "" or en[0].isupper():
          continue
        else:
          if is_russian and parts[3] != "\n" and parts[3] != "\r\n" and parts[3] != "\r":
              other_m = parts[2]
              other_f = parts[3].strip()
              translation_dict[en] = (other_m, other_f)
          else:
            other_m = parts[1].strip()
            other_f = None
            if len(parts) > 2 and parts[2] != "\n" and parts[2] != "\r\n" and parts[2] != "\r" and parts[2] != '':
              other_f = parts[2].strip()
            translation_dict[en] = (other_m, other_f)
      pickle.dump(translation_dict, open(new_path, "wb"))
      return len(translation_dict)

def load_vocab_goran(path):
  return pickle.load(open(path, "rb"))

def load_vectors_goran(path):
  return np.load(path)

def load_embedding_dict(vocab_path="", vector_path="", embeddings_path="", glove=False, postspec=False):
  """
  >>> _load_embedding_dict()
  :param vocab_path:
  :param vector_path:
  :return: embd_dict
  """
  if glove and postspec:
    raise ValueError("Glove and postspec cannot both be true")
  elif glove:
    if os.name == "nt":
      embd_dict = utils.load_embeddings("C:/Users/anlausch/workspace/embedding_files/glove.6B/glove.6B.300d.txt",
                                        word2vec=False)
    else:
      embd_dict = utils.load_embeddings("/work/anlausch/glove.6B.300d.txt", word2vec=False)
    return embd_dict
  elif postspec:
    embd_dict_temp = utils.load_embeddings("/work/anlausch/ft_postspec.txt", word2vec=False)
    embd_dict = {}
    for key, value in embd_dict_temp.items():
      embd_dict[key.split("en_")[1]] = value
    assert("test" in embd_dict)
    assert ("house" in embd_dict)
    return embd_dict
  elif embeddings_path != "":
    embd_dict = utils.load_embeddings(embeddings_path, word2vec=False)
    return embd_dict
  else:
    embd_dict = {}
    vocab = load_vocab_goran(vocab_path)
    vectors = load_vectors_goran(vector_path)
    for term, index in vocab.items():
      embd_dict[term] = vectors[index]
    assert len(embd_dict) == len(vocab)
    return embd_dict

def translate(translation_dict, terms):
  translation = []
  for t in terms:
    if t in translation_dict or t.lower() in translation_dict:
      if t.lower() in translation_dict:
        male, female = translation_dict[t.lower()]
      elif t in translation_dict:
        male, female = translation_dict[t]
      if female is None or female is '':
        translation.append(male)
      else:
        translation.append(male)
        translation.append(female)
    else:
      translation.append(t)
  translation = list(set(translation))
  return translation


def compute_oov_percentage():
  """
  >>> compute_oov_percentage()
  :return:
  """
  with codecs.open("./results/oov_short.txt", "w", "utf8") as f:
    for test in range(1,11):
      f.write("Test %d \n" % test)
      targets_1, targets_2, attributes_1, attributes_2 = XWEAT().__getattribute__("weat_" + str(test))()
      vocab = targets_1 + targets_2 + attributes_1 + attributes_2
      vocab = [t.lower() for t in vocab]
      #f.write("English vocab: %s \n" % str(vocab))
      for language in ["en", "es", "de", "tr", "ru", "hr", "it"]:
        if language != "en":
          #f.write("Translating terms from en to %s\n" % language)
          translation_dict = load_vocab_goran("./data/vocab_dict_en_" + language + ".p")
          vocab_translated = translate(translation_dict, vocab)
          vocab_translated = [t.lower() for t in vocab_translated]
          #f.write("Translated terms %s\n" % str(vocab))
        embd_dict = load_embedding_dict(vocab_path="/work/gglavas/data/word_embs/yacle/fasttext/200K/npformat/ft.wiki."+language+".300.vocab", vector_path="/work/gglavas/data/word_embs/yacle/fasttext/200K/npformat/ft.wiki."+language+".300.vectors")
        ins=[]
        not_ins=[]
        if language != "en":
          for term in vocab_translated:
            if term in embd_dict:
              ins.append(term)
            else:
              not_ins.append(term)
        else:
          for term in vocab:
            if term in embd_dict:
              ins.append(term)
            else:
              not_ins.append(term)
        #f.write("OOVs: %s\n" % str(not_ins))
        f.write("OOV Percentage for language %s: %s\n" % (language, (len(not_ins)/len(vocab))))
      f.write("\n")
  f.close()


def main():
  def boolean_string(s):
    if s not in {'False', 'True', 'false', 'true'}:
      raise ValueError('Not a valid boolean string')
    return s == 'True' or s == 'true'
  parser = argparse.ArgumentParser(description="Running XWEAT")
  parser.add_argument("--test_number", type=int, help="Number of the weat test to run", required=False)
  parser.add_argument("--permutation_number", type=int, default=None,
                      help="Number of permutations (otherwise all will be run)", required=False)
  parser.add_argument("--output_file", type=str, default=None, help="File to store the results)", required=False)
  parser.add_argument("--lower", type=boolean_string, default=False, help="Whether to lower the vocab", required=True)
  parser.add_argument("--similarity_type", type=str, default="cosine", help="Which similarity function to use",
                      required=False)
  parser.add_argument("--embedding_vocab", type=str, help="Vocab of the embeddings")
  parser.add_argument("--embedding_vectors", type=str, help="Vectors of the embeddings")
  parser.add_argument("--use_glove", type=boolean_string, default=False, help="Use glove")
  parser.add_argument("--postspec", type=boolean_string, default=False, help="Use postspecialized fasttext")
  parser.add_argument("--is_vec_format", type=boolean_string, default=False, help="Whether embeddings are in vec format")
  parser.add_argument("--embeddings", type=str, help="Vectors and vocab of the embeddings")
  parser.add_argument("--lang", type=str, default="en", help="Language to test")
  args = parser.parse_args()

  start = time.time()
  logging.basicConfig(level=logging.INFO)
  logging.info("XWEAT started")
  weat = XWEAT()
  if args.test_number == 1:
    targets_1, targets_2, attributes_1, attributes_2 = weat.weat_1()
  elif args.test_number == 2:
    targets_1, targets_2, attributes_1, attributes_2 = weat.weat_2()
  elif args.test_number == 3:
    targets_1, targets_2, attributes_1, attributes_2 = weat.weat_3()
  elif args.test_number == 4:
    targets_1, targets_2, attributes_1, attributes_2 = weat.weat_4()
  elif args.test_number == 5:
    targets_1, targets_2, attributes_1, attributes_2 = weat.weat_5()
  elif args.test_number == 6:
    targets_1, targets_2, attributes_1, attributes_2 = weat.weat_6()
  elif args.test_number == 7:
    targets_1, targets_2, attributes_1, attributes_2 = weat.weat_7()
  elif args.test_number == 8:
    targets_1, targets_2, attributes_1, attributes_2 = weat.weat_8()
  elif args.test_number == 9:
    targets_1, targets_2, attributes_1, attributes_2 = weat.weat_9()
  elif args.test_number == 10:
    targets_1, targets_2, attributes_1, attributes_2 = weat.weat_10()
  else:
    raise ValueError("Only WEAT 1 to 10 are supported")

  if args.lang != "en":
    logging.info("Translating terms from en to %s", args.lang)
    translation_dict = load_vocab_goran("./data/vocab_dict_en_" + args.lang + ".p")
    targets_1 = translate(translation_dict, targets_1)
    targets_2 = translate(translation_dict, targets_2)
    attributes_1 = translate(translation_dict, attributes_1)
    attributes_2 = translate(translation_dict, attributes_2)

  if args.lower:
    targets_1 = [t.lower() for t in targets_1]
    targets_2 = [t.lower() for t in targets_2]
    attributes_1 = [a.lower() for a in attributes_1]
    attributes_2 = [a.lower() for a in attributes_2]

  if args.use_glove:
    logging.info("Using glove")
    embd_dict = load_embedding_dict(glove=True)
  elif args.postspec:
    logging.info("Using postspecialized embeddings")
    embd_dict=load_embedding_dict(postspec=True)
  elif args.is_vec_format:
    logging.info("Embeddings are in vec format")
    embd_dict = load_embedding_dict(embeddings_path=args.embeddings, glove=False)
  else:
    embd_dict = load_embedding_dict(vocab_path=args.embedding_vocab, vector_path=args.embedding_vectors, glove=False)
  weat.set_embd_dict(embd_dict)

  logging.info("Embeddings loaded")
  logging.info("Running test")
  result = weat.run_test_precomputed_sims(targets_1, targets_2, attributes_1, attributes_2, args.permutation_number, args.similarity_type)
  logging.info(result)
  with codecs.open(args.output_file, "w", "utf8") as f:
    f.write("Config: ")
    f.write(str(args.test_number) + " and ")
    f.write(str(args.lower) + " and ")
    f.write(str(args.permutation_number) + "\n")
    f.write("Result: ")
    f.write(str(result))
    f.write("\n")
    end = time.time()
    duration_in_hours = ((end - start) / 60) / 60
    f.write(str(duration_in_hours))
    f.close()

if __name__ == "__main__":
  main()