From 419bebcadeb077415aa17c98fc70276a2ca4eef6 Mon Sep 17 00:00:00 2001 From: Wang Xiaojian Date: Tue, 17 Jun 2014 21:33:55 +0800 Subject: [PATCH] Implemented binary search --- simple_ir/Dic.cpp | 31 +++++++++++++++++++++++-------- simple_ir/List.cpp | 2 +- simple_ir/Posting.cpp | 1 + simple_ir/main.cpp | 2 +- simple_ir/test.cpp | 6 ++++++ 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/simple_ir/Dic.cpp b/simple_ir/Dic.cpp index d530a01..791279f 100644 --- a/simple_ir/Dic.cpp +++ b/simple_ir/Dic.cpp @@ -3,26 +3,33 @@ #include #include #include +#include using namespace std; Dic::Dic() { this->lists = vector(); + sorted = false; +} + +bool compareLists(List list1, List list2) +{ + return (list1.getTerm() < list2.getTerm()); } List* Dic::getListByTerm(string term) { - // Should only allow this operation when we have the list sorted by term. - // In that case we can do a binary search. - // Currently we do a linear search. - for(vector::iterator list = lists.begin(); list != lists.end(); list++) + if(!sorted) { - if(list->getTerm() == term) - { - return &(*list); - } + cout << "Lists not sorted yet!" << endl; + return NULL; } + List searchFor(term); + // Binary search + auto results = equal_range(lists.begin(), lists.end(), searchFor, compareLists); + if(results.first != lists.end()) + return &(*results.first); return NULL; } @@ -30,4 +37,12 @@ void Dic::addList(string term) { List list(term); this->lists.push_back(list); + sorted = false; +} + + +void Dic::sortLists() +{ + sort(lists.begin(), lists.end(), compareLists); + sorted = true; } \ No newline at end of file diff --git a/simple_ir/List.cpp b/simple_ir/List.cpp index 7e05640..73e3fb3 100644 --- a/simple_ir/List.cpp +++ b/simple_ir/List.cpp @@ -13,7 +13,7 @@ List::List(string term) this->term = term; } -string List::getTerm() +string List::getTerm() const { return term; } diff --git a/simple_ir/Posting.cpp b/simple_ir/Posting.cpp index 2b568eb..e149568 100644 --- a/simple_ir/Posting.cpp +++ b/simple_ir/Posting.cpp @@ -9,6 +9,7 @@ Posting::Posting(string docId, int fq) this->fq = fq; next=NULL; } + int Posting::freq() { return fq; diff --git a/simple_ir/main.cpp b/simple_ir/main.cpp index 1b14ebc..79966ab 100644 --- a/simple_ir/main.cpp +++ b/simple_ir/main.cpp @@ -9,7 +9,7 @@ using namespace std; int main(int argc, char** argv) { - testList(); + testDic(); //TokenReader::readAndLowerTokensFromFile("../Reuters/10.html"); diff --git a/simple_ir/test.cpp b/simple_ir/test.cpp index 3f51b0f..8be85a1 100644 --- a/simple_ir/test.cpp +++ b/simple_ir/test.cpp @@ -7,15 +7,18 @@ using namespace std; void testDic() { Dic* dic = new Dic(); + dic->sortLists(); List* list = dic->getListByTerm("a"); if(list != NULL) cout << "Error!" << endl; dic->addList("a"); + dic->sortLists(); list = dic->getListByTerm("a"); if(list->getTerm() != "a") cout << "Error!" << endl; dic->addList("b"); dic->addList("c"); + dic->sortLists(); list = dic->getListByTerm("a"); if(list->getTerm() != "a") cout << "Error!" << endl; @@ -35,6 +38,7 @@ void testDic() void testList() { List *list = new List("a"); + if(list->getTerm() != "a") cout << "Error!" << endl; if(list->getLength() != 0) @@ -54,5 +58,7 @@ void testList() posting = posting->next; if(posting != NULL) cout << "Error!" << endl; + if(list->getLength() != 3) + cout << "Error!" << endl; cout << "If no error, then success!" << endl; } \ No newline at end of file