test_inference.py

import unittest
from inference import *
import em as inf_mod
old_inference = False

class InferenceTest(unittest.TestCase):
    """
    NOTE for the Dai EM tests:
    Calculations assume skill and difficulty priors are both 0.5.
    """

    def test_dbeta(self):
        # mode at x = 0.5 (derivative = 0)
        res = dbeta(0.5,2,2)
        expected = 0.0
        self.assertAlmostEqual(expected, res)

        # should be decreasing for x > 0.5
        res = dbeta(0.6,2,2)
        expected = -1.2
        self.assertAlmostEqual(expected, res)

        # should be increasing for x < 0.5
        res = dbeta(0.3,2,2)
        expected = 2.4
        self.assertAlmostEqual(expected, res)

#   #TODO test components of inference module
#   def test_infer(self):
#       pass

#   def test_infer_difficulty_buckets(self):
#       pass


    def test_dai_all_unknown(self):
        # XXX can't really check any values except that they are in bounds
        # NOTE vote (worker, question): {'vote': 0/1}
        print("Test Dai w/ Difficulties Unknown, Skills Unknown:")
        votes = {(1, 1): {'vote': 0},
                (1,2):{'vote':1},
                      (2, 1): {'vote': 1},
                      (2, 2): {'vote': 0},
                      (3, 1): {'vote': 1},
                      (3, 2): {'vote': 1},
                      (4, 1): {'vote': 1},
                      (5, 1): {'vote': 0}}
                      
        workers = {1: {'skill': None},
                        2: {'skill': None},
                        3: {'skill': None},
                        4: {'skill': None},
                        5: {'skill': None}}
        
        questions= {1: {'difficulty': None},
                         2: {'difficulty': None}}
        d_res = self.dai_helper(votes, workers, questions)
        print(d_res)
        print("End Test Dai w/ Difficulties Unknown, Skills Unknown")

    def test_dai_all_known(self):
        # Should just do 1 E-Step, exact results known
        print("Test Dai w/ Difficulties Known, Skills Known:")
        votes = {(1, 1): {'vote': 0},
                      (2, 1): {'vote': 1},
                      (2, 2): {'vote': 1}}
                      
        #NOTE: skills awkwardly inverted because calculation was done with inverse skills
        workers = {1: {'skill': 1/0.6},
                        2: {'skill': 1/0.7}}
        
        questions= {1: {'difficulty': 0.9},
                         2: {'difficulty': 0.1}}
        d_res = self.dai_helper(votes, workers, questions)
        print(d_res)

        expected_q1_post = 0.5078721
        expected_q2_post = 0.9301324
        self.assertAlmostEqual(d_res['posteriors'][1], expected_q1_post)
        self.assertAlmostEqual(d_res['posteriors'][2], expected_q2_post)
        print("End Test Dai w/ Difficulties Known, Skills Known")

    def test_dai_skills_known(self):
        print("Test Dai w/ Difficulties Unknown, Skills Known:")
        votes = {(1, 1): {'vote': 0},
                      (2, 1): {'vote': 1},
                      (2, 2): {'vote': 1}}
                      
        workers = {1: {'skill': 1/0.6},
                        2: {'skill': 1/0.5}}
        
        questions= {1: {'difficulty': None},
                         2: {'difficulty': None}}
        d_res = self.dai_helper(votes, workers, questions)
        print(d_res)

        # Know: P(q1 answer = 1) < 0.5 because worker 1 voted 0
        # and is more skilled than worker 2 who voted 1
        self.assertLess(d_res['posteriors'][1], 0.5)

        # Worker 2 cast the only vote on question 2
        # using prob_correct = 1/2 * (1+(1-d)^s) with d2 and s2
        # should match the posterior probability generated by EM
        # XXX NOTE occasionally EM output leads to error of 1e-6 or so
        prob_correct = 0.5 * (1+(1-d_res['questions'][2])**workers[2]['skill'])
        self.assertAlmostEqual(d_res['posteriors'][2], prob_correct)

        # Know q1 difficulty should be > q2 difficulty
        # bc workers disagree
        #BUG XXX not necessarily true?!
        self.assertGreater(d_res['questions'][1], d_res['questions'][2])

        print("End Test Dai w/ Difficulties Unknown, Skills Known")

    def test_dai_difficulties_known(self):
        print("Test Dai w/ Difficulties Known, Skills Unknown:")
        # test dai
        votes = {(1, 1): {'vote': 0},
                      (1, 2): {'vote':1},
                      (2, 1): {'vote': 1},
                      (2, 2): {'vote': 1}}
                      
        workers = {1: {'skill': 1/0.5},
                        2: {'skill': 1/0.5}}
        
        questions= {1: {'difficulty': 0.9},
                         2: {'difficulty': 0.1}}
        d_res = self.dai_helper(votes, workers, questions)

        # hand calculated
        expected_q1_post = 0.5
        expected_q2_post = 0.9891009
        expected_w1_skill = 1/0.5 #prior
        expected_w2_skill = 1/0.5

        self.assertAlmostEqual(expected_q1_post, d_res['posteriors'][1])
        self.assertAlmostEqual(expected_q2_post, d_res['posteriors'][2])

        self.assertAlmostEqual(expected_w1_skill, d_res['workers'][1])
        self.assertAlmostEqual(expected_w2_skill, d_res['workers'][2])

        print(d_res)
        print("End Test Dai w/ Difficulties Known, Skills Unknown")

    def dai_helper(self, votes, workers, questions):
        if old_inference:
            d = InferenceModule(method = 'dai')
            d_res = d.estimate(votes, workers, questions)
        else:
            d_res = inf_mod.estimate(votes, workers, questions)
        return d_res

    def test_gradient(self):
        """
        Check the gradient calculations used in EM.
        """

        # choose error tolerance
        ACCEPTABLE_GRADIENT_ERROR = 0.001

        # check the gradient with n sets of random parameters
        num_checks = 1000

        # choose number of questions and workers to use
        # as well as bounds for skill + difficulty
        nq=5
        ad,bd = (0.01,0.99)
        nw=5
        aw,bw = (0.01,2.0)

        def CLOSURE_OBJECTIVE_FUNCTION(posteriors, B):
            def FUNC(x):
                D = x[:nq]
                S = x[nq:]
                return -inf_mod.func(posteriors,B,D,S)
            def GRAD(x):
                D = x[:nq]
                S = x[nq:]
                dd = inf_mod.CALC_DD(posteriors, B, D, S)
                ds = inf_mod.CALC_DS(posteriors, B, D, S)
                jac = np.hstack((dd, ds))
                return -jac
            return FUNC, GRAD

        for i in range(num_checks):
            posteriors = np.random.rand(nq)

            B=np.array([[np.random.choice([-1,0,1]) for q in range(nq)] for w in range(nw)])

            test_diffs = [ad + (bd-ad)*np.random.random() for q in range(nq)]
            test_skills = [aw + (bw-aw)*np.random.random() for w in range(nw)]
            test_params = np.concatenate([test_diffs, test_skills])

            f,g = CLOSURE_OBJECTIVE_FUNCTION(posteriors, B)
            err = scipy.optimize.check_grad(f, g, test_params)
            #print("Gradient error of %.8f with params=%s" % (err, test_params))
            self.assertLess(err, ACCEPTABLE_GRADIENT_ERROR)

    def test_big_em(self):
        import itertools
        import time
        print("Start Test EM with lots of votes")
        nw = 10
        nq = 100
        workers = {i:{'skill':None} for i in range(nw)}
        questions = {i:{'difficulty':None} for i in range(nq)}
        skills = [np.random.random() for i in range(nw)]
        diffs = [np.random.random() for i in range(nq)]
        truth = [np.random.choice([0,1]) for i in range(nq)]

        def randomVote(d,s,v):
            accuracy = 0.5 * (1 + (1 - d)**s)
            draw= np.random.random()
            if draw < accuracy:
                #vote for correct answer
                return v
            else:
                #vote for wrong answer
                return 1-v

        votes = {(w,q):{'vote':randomVote(diffs[q], skills[w], truth[q])} for (w,q) in itertools.product(workers, questions)}

        start=time.time()
        d_res = self.dai_helper(votes, workers, questions)
        end = time.time()
        print d_res
        print("End Test EM with %d votes, took time=%s" % (nq*nw, str(end-start)))


#   def test_mdp(self):
#       # test mdp
#       m = InferenceModule(method = 'mdp')
#       m_res = m.estimate(self.votes, self.workers, self.questions)
#       print m_res
        
if __name__ == '__main__':
    unittest.main()