Removed list(set(...)) de-duplicate operations in ParseResults.__init__

as they destory the ordering of urls, users etc in the tweet. The list(set( operation on replies was dangerous as reply was a string not a list (so the string was split into a list of set elements of characters). Removed lots of non-pep8 whitespace
BonsaiDen · ianozsvald · Sep 7, 2012 · Sep 7, 2012 · Sep 7, 2012 · Sep 7, 2012
commit 82cf8641060725ccf5e4e00e6cc3b60191409e2c
diff --git a/tests.py b/tests.py
@@ -24,525 +24,525 @@
 class TWPTests(unittest.TestCase):
     def setUp(self):
         self.parser = ttp.Parser()
-    
-    
+
+
     # General Tests ------------------------------------------------------------
     # --------------------------------------------------------------------------
     def test_all_not_allow_amp_without_question(self):
         result = self.parser.parse(u'Check out: http://www.github.com/test&@username')
         self.assertEqual(result.html, u'Check out: <a href="http://www.github.com/test">http://www.github.com/test</a>&<a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
         self.assertEqual(result.urls, [u'http://www.github.com/test'])
-    
+
     def test_all_not_break_url_at(self):
         result = self.parser.parse(u'http://www.flickr.com/photos/29674651@N00/4382024406')
         self.assertEqual(result.html, u'<a href="http://www.flickr.com/photos/29674651@N00/4382024406">http://www.flickr.com/photo...</a>')
         self.assertEqual(result.urls, [u'http://www.flickr.com/photos/29674651@N00/4382024406'])
-    
-    
+
+
     # URL tests ----------------------------------------------------------------
     # --------------------------------------------------------------------------
     def test_url_mid(self):
         result = self.parser.parse(u'text http://example.com more text')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a> more text')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_unicode(self):
         result = self.parser.parse(u'I enjoy Macintosh Brand computers: http://✪df.ws/ejp')
         self.assertEqual(result.html, u'I enjoy Macintosh Brand computers: <a href="http://✪df.ws/ejp">http://✪df.ws/ejp</a>')
         self.assertEqual(result.urls, [u'http://\u272adf.ws/ejp'])
-    
+
     def test_url_parentheses(self):
         result = self.parser.parse(u'text (http://example.com)')
         self.assertEqual(result.html, u'text (<a href="http://example.com">http://example.com</a>)')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_underscore(self):
         result = self.parser.parse(u'text http://example.com/test/foo_123.jpg')
         self.assertEqual(result.html, u'text <a href="http://example.com/test/foo_123.jpg">http://example.com/test/foo...</a>')
         self.assertEqual(result.urls, [u'http://example.com/test/foo_123.jpg'])
-    
+
     def test_url_underscore_dot(self):
         result = self.parser.parse(u'text http://example.com/test/bla.net_foo_123.jpg')
         self.assertEqual(result.html, u'text <a href="http://example.com/test/bla.net_foo_123.jpg">http://example.com/test/bla...</a>')
         self.assertEqual(result.urls, [u'http://example.com/test/bla.net_foo_123.jpg'])
-    
+
     def test_url_amp_lang_equals(self):
         result = self.parser.parse(u'Check out http://search.twitter.com/search?q=avro&lang=en')
         self.assertEqual(result.html, u'Check out <a href="http://search.twitter.com/search?q=avro&amp;lang=en">http://search.twitter.com/s...</a>')
         self.assertEqual(result.urls, [u'http://search.twitter.com/search?q=avro&lang=en'])
-    
+
     def test_url_amp_break(self):
         result = self.parser.parse(u'Check out http://twitter.com/te?foo&invalid=True')
         self.assertEqual(result.html, u'Check out <a href="http://twitter.com/te?foo&amp;invalid=True">http://twitter.com/te?foo...</a>')
         self.assertEqual(result.urls, [u'http://twitter.com/te?foo&invalid=True'])
-    
+
     def test_url_dash(self):
         result = self.parser.parse(u'Is www.foo-bar.com a valid URL?')
         self.assertEqual(result.html, u'Is <a href="http://www.foo-bar.com">www.foo-bar.com</a> a valid URL?')
         self.assertEqual(result.urls, [u'www.foo-bar.com'])
-    
+
     def test_url_multiple(self):
         result = self.parser.parse(u'http://example.com https://sslexample.com http://sub.example.com')
         self.assertEqual(result.html, u'<a href="http://example.com">http://example.com</a> <a href="https://sslexample.com">https://sslexample.com</a> <a href="http://sub.example.com">http://sub.example.com</a>')
         self.assertEqual(result.urls, [u'http://example.com', u'https://sslexample.com', u'http://sub.example.com'])
-    
+
     def test_url_raw_domain(self):
         result = self.parser.parse(u'See http://example.com example.com')
         self.assertEqual(result.html, u'See <a href="http://example.com">http://example.com</a> example.com')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_embed_link(self):
         result = self.parser.parse(u'<link rel=\'true\'>http://example.com</link>')
         self.assertEqual(result.html, u'<link rel=\'true\'><a href="http://example.com">http://example.com</a></link>')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_trailing(self):
         result = self.parser.parse(u'text http://example.com')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_japanese(self):
         result = self.parser.parse(u'いまなにしてるhttp://example.comいまなにしてる')
         self.assertEqual(result.html, u'いまなにしてる<a href="http://example.com">http://example.com</a>いまなにしてる')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_lots_of_punctuation(self):
         result = self.parser.parse(u'text http://xo.com/~matthew+%-,.;x')
         self.assertEqual(result.html, u'text <a href="http://xo.com/~matthew+%-,.;x">http://xo.com/~matthew+%-,.;x</a>')
         self.assertEqual(result.urls, [u'http://xo.com/~matthew+%-,.;x'])
-    
+
     def test_url_question_numbers(self):
         result = self.parser.parse(u'text http://example.com/?77e8fd')
         self.assertEqual(result.html, u'text <a href="http://example.com/?77e8fd">http://example.com/?77e8fd</a>')
         self.assertEqual(result.urls, [u'http://example.com/?77e8fd'])
-    
+
     def test_url_one_letter_other(self):
         result = self.parser.parse(u'text http://u.nu/')
         self.assertEqual(result.html, u'text <a href="http://u.nu/">http://u.nu/</a>')
         self.assertEqual(result.urls, [u'http://u.nu/'])
-        
+
         result = self.parser.parse(u'text http://u.tv/')
         self.assertEqual(result.html, u'text <a href="http://u.tv/">http://u.tv/</a>')
         self.assertEqual(result.urls, [u'http://u.tv/'])
-    
+
     def test_url_one_letter_iana(self):
         result = self.parser.parse(u'text http://x.com/')
         self.assertEqual(result.html, u'text <a href="http://x.com/">http://x.com/</a>')
         self.assertEqual(result.urls, [u'http://x.com/'])
-        
+
         result = self.parser.parse(u'text http://Q.com/')
         self.assertEqual(result.html, u'text <a href="http://Q.com/">http://Q.com/</a>')
         self.assertEqual(result.urls, [u'http://Q.com/'])
-        
+
         result = self.parser.parse(u'text http://z.com/')
         self.assertEqual(result.html, u'text <a href="http://z.com/">http://z.com/</a>')
         self.assertEqual(result.urls, [u'http://z.com/'])
-        
+
         result = self.parser.parse(u'text http://i.net/')
         self.assertEqual(result.html, u'text <a href="http://i.net/">http://i.net/</a>')
         self.assertEqual(result.urls, [u'http://i.net/'])
-        
+
         result = self.parser.parse(u'text http://q.net/')
         self.assertEqual(result.html, u'text <a href="http://q.net/">http://q.net/</a>')
         self.assertEqual(result.urls, [u'http://q.net/'])
-        
+
         result = self.parser.parse(u'text http://X.org/')
         self.assertEqual(result.html, u'text <a href="http://X.org/">http://X.org/</a>')
         self.assertEqual(result.urls, [u'http://X.org/'])
-    
+
     def test_url_long_hypens(self):
         result = self.parser.parse(u'text http://word-and-a-number-8-ftw.domain.tld/')
         self.assertEqual(result.html, u'text <a href="http://word-and-a-number-8-ftw.domain.tld/">http://word-and-a-number-8-...</a>')
         self.assertEqual(result.urls, [u'http://word-and-a-number-8-ftw.domain.tld/'])
-    
-    
+
+
     # URL not tests ------------------------------------------------------------
     def test_not_url_dotdotdot(self):
         result = self.parser.parse(u'Is www...foo a valid URL?')
         self.assertEqual(result.html, u'Is www...foo a valid URL?')
         self.assertEqual(result.urls, [])
-    
+
     def test_not_url_dash(self):
         result = self.parser.parse(u'Is www.-foo.com a valid URL?')
         self.assertEqual(result.html, u'Is www.-foo.com a valid URL?')
         self.assertEqual(result.urls, [])
-    
+
     def test_not_url_no_tld(self):
         result = self.parser.parse(u'Is http://no-tld a valid URL?')
         self.assertEqual(result.html, u'Is http://no-tld a valid URL?')
         self.assertEqual(result.urls, [])
-    
+
     def test_not_url_tld_too_short(self):
         result = self.parser.parse(u'Is http://tld-too-short.x a valid URL?')
         self.assertEqual(result.html, u'Is http://tld-too-short.x a valid URL?')
         self.assertEqual(result.urls, [])
-    
+
     def test_all_not_break_url_at(self):
         result = self.parser.parse(u'http://www.flickr.com/photos/29674651@N00/4382024406')
         self.assertEqual(result.html, u'<a href="http://www.flickr.com/photos/29674651@N00/4382024406">http://www.flickr.com/photo...</a>')
         self.assertEqual(result.urls, [u'http://www.flickr.com/photos/29674651@N00/4382024406'])
-    
+
     def test_not_url_one_letter_iana(self):
         result = self.parser.parse(u'text http://a.com/ http://a.net/ http://a.org/')
         self.assertEqual(result.html, u'text http://a.com/ http://a.net/ http://a.org/')
         self.assertEqual(result.urls, [])
-    
-    
+
+
     # URL followed Tests -------------------------------------------------------
     def test_url_followed_question(self):
         result = self.parser.parse(u'text http://example.com?')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>?')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_colon(self):
         result = self.parser.parse(u'text http://example.com:')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>:')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_curly_brace(self):
         result = self.parser.parse(u'text http://example.com}')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>}')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_single_quote(self):
         result = self.parser.parse(u'text http://example.com')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_dot(self):
         result = self.parser.parse(u'text http://example.com.')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>.')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_exclamation(self):
         result = self.parser.parse(u'text http://example.com!')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>!')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_comma(self):
         result = self.parser.parse(u'text http://example.com,')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>,')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_brace(self):
         result = self.parser.parse(u'text http://example.com)')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>)')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_big_brace(self):
         result = self.parser.parse(u'text http://example.com]')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>]')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_equals(self):
         result = self.parser.parse(u'text http://example.com=')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>=')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_semicolon(self):
         result = self.parser.parse(u'text http://example.com;')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>;')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_hypen(self):
         result = self.parser.parse(u'text http://domain.tld-that-you-should-have-put-a-space-after')
         self.assertEqual(result.html, u'text <a href="http://domain.tld">http://domain.tld</a>-that-you-should-have-put-a-space-after')
         self.assertEqual(result.urls, [u'http://domain.tld'])
-    
-    
+
+
     # URL preceeded Tests -------------------------------------------------------
     def test_url_preceeded_colon(self):
         result = self.parser.parse(u'text:http://example.com')
         self.assertEqual(result.html, u'text:<a href="http://example.com">http://example.com</a>')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_not_url_preceeded_equals(self):
         result = self.parser.parse(u'text =http://example.com')
         self.assertEqual(result.html, u'text =http://example.com')
         self.assertEqual(result.urls, [])
-    
+
     # NOT
     def test_not_url_preceeded_forwardslash(self):
         result = self.parser.parse(u'text /http://example.com')
         self.assertEqual(result.html, u'text /http://example.com')
         self.assertEqual(result.urls, [])
-    
+
     def test_not_url_preceeded_exclamation(self):
         result = self.parser.parse(u'text !http://example.com')
         self.assertEqual(result.html, u'text !http://example.com')
         self.assertEqual(result.urls, [])
-    
-    
+
+
     # URL numeric tests --------------------------------------------------------
     def test_url_at_numeric(self):
         result = self.parser.parse(u'http://www.flickr.com/photos/29674651@N00/4382024406')
         self.assertEqual(result.html, u'<a href="http://www.flickr.com/photos/29674651@N00/4382024406">http://www.flickr.com/photo...</a>')
         self.assertEqual(result.urls, [u'http://www.flickr.com/photos/29674651@N00/4382024406'])
-    
+
     def test_url_at_non_numeric(self):
         result = self.parser.parse(u'http://www.flickr.com/photos/29674651@N00/foobar')
         self.assertEqual(result.html, u'<a href="http://www.flickr.com/photos/29674651@N00/foobar">http://www.flickr.com/photo...</a>')
         self.assertEqual(result.urls, [u'http://www.flickr.com/photos/29674651@N00/foobar'])
-    
-    
+
+
     # URL domain tests ---------------------------------------------------------
     def test_url_WWW(self):
         result = self.parser.parse(u'WWW.EXAMPLE.COM')
         self.assertEqual(result.html, u'<a href="http://WWW.EXAMPLE.COM">WWW.EXAMPLE.COM</a>')
         self.assertEqual(result.urls, [u'WWW.EXAMPLE.COM'])
-    
+
     def test_url_www(self):
         result = self.parser.parse(u'www.example.com')
         self.assertEqual(result.html, u'<a href="http://www.example.com">www.example.com</a>')
         self.assertEqual(result.urls, [u'www.example.com'])
-    
+
     def test_url_only_domain_query_followed_period(self):
         result = self.parser.parse(u'I think it\'s proper to end sentences with a period http://tell.me/why?=because.i.want.it. Even when they contain a URL.')
         self.assertEqual(result.html, u'I think it\'s proper to end sentences with a period <a href="http://tell.me/why?=because.i.want.it">http://tell.me/why?=because...</a>. Even when they contain a URL.')
         self.assertEqual(result.urls, [u'http://tell.me/why?=because.i.want.it'])
-    
+
     def test_url_only_domain_followed_period(self):
         result = self.parser.parse(u'I think it\'s proper to end sentences with a period http://tell.me. Even when they contain a URL.')
         self.assertEqual(result.html, u'I think it\'s proper to end sentences with a period <a href="http://tell.me">http://tell.me</a>. Even when they contain a URL.')
         self.assertEqual(result.urls, [u'http://tell.me'])
-    
+
     def test_url_only_domain_path_followed_period(self):
         result = self.parser.parse(u'I think it\'s proper to end sentences with a period http://tell.me/why. Even when they contain a URL.')
         self.assertEqual(result.html, u'I think it\'s proper to end sentences with a period <a href="http://tell.me/why">http://tell.me/why</a>. Even when they contain a URL.')
         self.assertEqual(result.urls, [u'http://tell.me/why'])
-    
+
     def test_url_long_tld(self):
         result = self.parser.parse(u'http://example.mobi/path')
         self.assertEqual(result.html, u'<a href="http://example.mobi/path">http://example.mobi/path</a>')
         self.assertEqual(result.urls, [u'http://example.mobi/path'])
-    
+
     def test_url_multiple_protocols(self):
         result = self.parser.parse(u'http://foo.com AND https://bar.com AND www.foobar.com')
         self.assertEqual(result.html, u'<a href="http://foo.com">http://foo.com</a> AND <a href="https://bar.com">https://bar.com</a> AND <a href="http://www.foobar.com">www.foobar.com</a>')
         self.assertEqual(result.urls, [u'http://foo.com', u'https://bar.com', u'www.foobar.com'])
-    
+
     # NOT
     def test_not_url_exclamation_domain(self):
         result = self.parser.parse(u'badly formatted http://foo!bar.com')
         self.assertEqual(result.html, u'badly formatted http://foo!bar.com')
         self.assertEqual(result.urls, [])
-    
+
     def test_not_url_under_domain(self):
         result = self.parser.parse(u'badly formatted http://foo_bar.com')
         self.assertEqual(result.html, u'badly formatted http://foo_bar.com')
         self.assertEqual(result.urls, [])
-    
-    
+
+
     # Hashtag tests ------------------------------------------------------------
     # --------------------------------------------------------------------------
     def test_hashtag_followed_full_whitespace(self):
         result = self.parser.parse(u'#hashtag　text')
         self.assertEqual(result.html, u'<a href="http://search.twitter.com/search?q=%23hashtag">#hashtag</a>　text')
         self.assertEqual(result.tags, [u'hashtag'])
-    
+
     def test_hashtag_followed_full_hash(self):
         result = self.parser.parse(u'＃hashtag')
         self.assertEqual(result.html, u'<a href="http://search.twitter.com/search?q=%23hashtag">＃hashtag</a>')
         self.assertEqual(result.tags, [u'hashtag'])
-    
+
     def test_hashtag_preceeded_full_whitespace(self):
         result = self.parser.parse(u'text　#hashtag')
         self.assertEqual(result.html, u'text　<a href="http://search.twitter.com/search?q=%23hashtag">#hashtag</a>')
         self.assertEqual(result.tags, [u'hashtag'])
-    
+
     def test_hashtag_number(self):
         result = self.parser.parse(u'text #1tag')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%231tag">#1tag</a>')
         self.assertEqual(result.tags, [u'1tag'])
-    
+
     def test_not_hashtag_escape(self):
         result = self.parser.parse(u'&#nbsp;')
         self.assertEqual(result.html, u'&#nbsp;')
         self.assertEqual(result.tags, [])
-    
+
     def test_hashtag_japanese(self):
         result = self.parser.parse(u'text #hashtagの')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%23hashtag">#hashtag</a>の')
         self.assertEqual(result.tags, [u'hashtag'])
-    
+
     def test_hashtag_period(self):
         result = self.parser.parse(u'text.#hashtag')
         self.assertEqual(result.html, u'text.<a href="http://search.twitter.com/search?q=%23hashtag">#hashtag</a>')
         self.assertEqual(result.tags, [u'hashtag'])
-    
+
     def test_hashtag_trailing(self):
         result = self.parser.parse(u'text #hashtag')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%23hashtag">#hashtag</a>')
         self.assertEqual(result.tags, [u'hashtag'])
-    
+
     def test_not_hashtag_exclamation(self):
         result = self.parser.parse(u'text #hashtag!')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%23hashtag">#hashtag</a>!')
         self.assertEqual(result.tags, [u'hashtag'])
-    
+
     def test_hashtag_multiple(self):
         result = self.parser.parse(u'text #hashtag1 #hashtag2')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%23hashtag1">#hashtag1</a> <a href="http://search.twitter.com/search?q=%23hashtag2">#hashtag2</a>')
         self.assertEqual(result.tags, [u'hashtag1', u'hashtag2'])
-    
+
     def test_not_hashtag_number(self):
         result = self.parser.parse(u'text #1234')
         self.assertEqual(result.html, u'text #1234')
         self.assertEqual(result.tags, [])
-    
+
     def test_not_hashtag_text(self):
         result = self.parser.parse(u'text#hashtag')
         self.assertEqual(result.html, u'text#hashtag')
         self.assertEqual(result.tags, [])
-    
+
     def test_hashtag_umlaut(self):
         result = self.parser.parse(u'text #hash_tagüäö')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%23hash_tag%C3%BC%C3%A4%C3%B6">#hash_tagüäö</a>')
         self.assertEqual(result.tags, [u'hash_tag\xfc\xe4\xf6'])
-    
+
     def test_hashtag_alpha(self):
         result = self.parser.parse(u'text #hash0tag')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%23hash0tag">#hash0tag</a>')
         self.assertEqual(result.tags, [u'hash0tag'])
-    
+
     def test_hashtag_under(self):
         result = self.parser.parse(u'text #hash_tag')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%23hash_tag">#hash_tag</a>')
         self.assertEqual(result.tags, [u'hash_tag'])
-    
-    
+
+
     # Username tests -----------------------------------------------------------
     # --------------------------------------------------------------------------
     def test_not_username_preceded_letter(self):
         result = self.parser.parse(u'meet@the beach')
         self.assertEqual(result.html, u'meet@the beach')
         self.assertEqual(result.users, [])
-    
+
     def test_username_preceded_punctuation(self):
         result = self.parser.parse(u'.@username')
         self.assertEqual(result.html, u'.<a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
-    
+
     def test_username_preceded_japanese(self):
         result = self.parser.parse(u'あ@username')
         self.assertEqual(result.html, u'あ<a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
-    
+
     def test_username_followed_japanese(self):
         result = self.parser.parse(u'@usernameの')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username">@username</a>の')
         self.assertEqual(result.users, [u'username'])
-    
+
     def test_username_surrounded_japanese(self):
         result = self.parser.parse(u'あ@usernameの')
         self.assertEqual(result.html, u'あ<a href="http://twitter.com/username">@username</a>の')
         self.assertEqual(result.users, [u'username'])
-    
+
     def test_username_followed_punctuation(self):
         result = self.parser.parse(u'@username&^$%^')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username">@username</a>&^$%^')
         self.assertEqual(result.users, [u'username'])
-    
+
     def test_not_username_spaced(self):
         result = self.parser.parse(u'@ username')
         self.assertEqual(result.html, u'@ username')
         self.assertEqual(result.users, [])
-    
+
     def test_username_beginning(self):
         result = self.parser.parse(u'@username text')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username">@username</a> text')
         self.assertEqual(result.users, [u'username'])
-    
+
     def test_username_to_long(self):
         result = self.parser.parse(u'@username9012345678901')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username901234567890">@username901234567890</a>1')
         self.assertEqual(result.users, [u'username901234567890'])
-    
+
     def test_username_full_at_sign(self):
         result = self.parser.parse(u'＠username')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username">＠username</a>')
         self.assertEqual(result.users, [u'username'])
-    
+
     def test_username_trailing(self):
         result = self.parser.parse(u'text @username')
         self.assertEqual(result.html, u'text <a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
-    
+
     # Replies
     def test_username_reply_simple(self):
         result = self.parser.parse(u'@username')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
         self.assertEqual(result.reply, u'username')
-    
+
     def test_username_reply_whitespace(self):
         result = self.parser.parse(u'   @username')
         self.assertEqual(result.html, u'   <a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
         self.assertEqual(result.reply, u'username')
-    
+
     def test_username_reply_full(self):
         result = self.parser.parse(u'　@username')
         self.assertEqual(result.html, u'　<a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
         self.assertEqual(result.reply, u'username')
-    
+
     def test_username_non_reply(self):
         result = self.parser.parse(u'test @username')
         self.assertEqual(result.html, u'test <a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
         self.assertEqual(result.reply, None)
-    
-    
+
+
     # List tests ---------------------------------------------------------------
     # --------------------------------------------------------------------------
     def test_list_preceeded(self):
         result = self.parser.parse(u'text @username/list')
         self.assertEqual(result.html, u'text <a href="http://twitter.com/username/list">@username/list</a>')
         self.assertEqual(result.lists, [(u'username', u'list')])
-    
+
     def test_list_beginning(self):
         result = self.parser.parse(u'@username/list')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username/list">@username/list</a>')
         self.assertEqual(result.lists, [(u'username', u'list')])
-    
+
     def test_list_preceeded_punctuation(self):
         result = self.parser.parse(u'.@username/list')
         self.assertEqual(result.html, u'.<a href="http://twitter.com/username/list">@username/list</a>')
         self.assertEqual(result.lists, [(u'username', u'list')])
-    
+
     def test_list_followed_punctuation(self):
         result = self.parser.parse(u'@username/list&^$%^')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username/list">@username/list</a>&^$%^')
         self.assertEqual(result.lists, [(u'username', u'list')])
-    
+
     def test_list_not_slash_space(self):
         result = self.parser.parse(u'@username/ list')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username">@username</a>/ list')
         self.assertEqual(result.users, [u'username'])
         self.assertEqual(result.lists, [])
-    
+
     def test_list_beginning(self):
         result = self.parser.parse(u'@username/list')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username/list">@username/list</a>')
         self.assertEqual(result.lists, [(u'username', u'list')])
-    
+
     def test_list_not_empty_username(self):
         result = self.parser.parse(u'text @/list')
         self.assertEqual(result.html, u'text @/list')
         self.assertEqual(result.lists, [])
-    
+
     def test_list_not_preceeded_letter(self):
         result = self.parser.parse(u'meet@the/beach')
         self.assertEqual(result.html, u'meet@the/beach')
         self.assertEqual(result.lists, [])
-    
+
     def test_list_long_truncate(self):
         result = self.parser.parse(u'@username/list5678901234567890123456789012345678901234567890123456789012345678901234567890A')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username/list5678901234567890123456789012345678901234567890123456789012345678901234567890">@username/list5678901234567890123456789012345678901234567890123456789012345678901234567890</a>A')
         self.assertEqual(result.lists, [(u'username', u'list5678901234567890123456789012345678901234567890123456789012345678901234567890')])
-    
+
     def test_list_with_dash(self):
         result = self.parser.parse(u'text @username/list-foo')
         self.assertEqual(result.html, u'text <a href="http://twitter.com/username/list-foo">@username/list-foo</a>')

diff --git a/ttp.py b/ttp.py
@@ -60,202 +60,201 @@
                           PATH_ENDING_CHARS, QUERY_CHARS, QUERY_ENDING_CHARS),
                           re.IGNORECASE)
 
-
 # Registered IANA one letter domains
 IANA_ONE_LETTER_DOMAINS = ('x.com', 'x.org', 'z.com', 'q.net', 'q.com', 'i.net')
 
 
 class ParseResult(object):
     '''A class containing the results of a parsed Tweet.
-    
+
     Attributes:
     - urls:
         A list containing all the valid urls in the Tweet.
-    
+
     - users
         A list containing all the valid usernames in the Tweet.
-    
+
     - reply
         A string containing the username this tweet was a reply to.
         This only matches a username at the beginning of the Tweet,
         it may however be preceeded by whitespace.
         Note: It's generally better to rely on the Tweet JSON/XML in order to
         find out if it's a reply or not.
-        
+
     - lists
         A list containing all the valid lists in the Tweet.
         Each list item is a tuple in the format (username, listname).
-        
+
     - tags
         A list containing all the valid tags in theTweet.
-    
+
     - html
         A string containg formatted HTML.
         To change the formatting sublcass twp.Parser and override the format_*
         methods.
-    
+
     '''
-    
+
     def __init__(self, urls, users, reply, lists, tags, html):
-        self.urls = list(set(urls)) if urls else []  #fixes dups
-        self.users = list(set(users)) if users else []
-        self.lists = list(set(lists)) if lists else []
-        self.reply = list(set(reply)) if reply else []
-        self.tags = list(set(tags)) if tags else []
+        self.urls = urls if urls else []
+        self.users = users if users else []
+        self.lists = lists if lists else []
+        self.reply = reply if reply else None
+        self.tags = tags if tags else []
         self.html = html
 
 
 class Parser(object):
     '''A Tweet Parser'''
-    
+
     def __init__(self, max_url_length=30):
         self._max_url_length = max_url_length
-    
+
     def parse(self, text, html=True):
         '''Parse the text and return a ParseResult instance.'''
         self._urls = []
         self._users = []
         self._lists = []
         self._tags = []
-        
+
         reply = REPLY_REGEX.match(text)
         reply = reply.groups(0)[0] if reply is not None else None
-        
+
         parsed_html = self._html(text) if html else self._text(text)
         return ParseResult(self._urls, self._users, reply,
                            self._lists, self._tags, parsed_html)
-    
+
     def _text(self, text):
         '''Parse a Tweet without generating HTML.'''
         URL_REGEX.sub(self._parse_urls, text)
         USERNAME_REGEX.sub(self._parse_users, text)
         LIST_REGEX.sub(self._parse_lists, text)
         HASHTAG_REGEX.sub(self._parse_tags, text)
         return None
-    
+
     def _html(self, text):
         '''Parse a Tweet and generate HTML.'''
         html = URL_REGEX.sub(self._parse_urls, text)
         html = USERNAME_REGEX.sub(self._parse_users, html)
         html = LIST_REGEX.sub(self._parse_lists, html)
         return HASHTAG_REGEX.sub(self._parse_tags, html)
-    
-    
+
+
     # Internal parser stuff ----------------------------------------------------
     def _parse_urls(self, match):
         '''Parse URLs.'''
-        
+
         mat = match.group(0)
-        
+
         # Fix a bug in the regex concerning www...com and www.-foo.com domains
         # TODO fix this in the regex instead of working around it here
         domain = match.group(5)
         if domain[0] in '.-':
             return mat
-        
+
         # Only allow IANA one letter domains that are actually registered
         if len(domain) == 5 \
            and domain[-4:].lower() in ('.com', '.org', '.net') \
            and not domain.lower() in IANA_ONE_LETTER_DOMAINS:
-            
+
             return mat
-        
+
         # Check for urls without http(s)
         pos = mat.find('http')
         if pos != -1:
             pre, url = mat[:pos], mat[pos:]
             full_url = url
-        
+
         # Find the www and force http://
         else:
             pos = mat.lower().find('www')
             pre, url = mat[:pos], mat[pos:]
             full_url = 'http://%s' % url
-        
+
         self._urls.append(url)
-        
+
         if self._html:
             return '%s%s' % (pre, self.format_url(full_url,
                                        self._shorten_url(escape(url))))
-    
+
     def _parse_users(self, match):
         '''Parse usernames.'''
-        
+
         # Don't parse lists here
         if match.group(2) is not None:
             return match.group(0)
-        
+
         mat = match.group(0)
         self._users.append(mat[1:])
-        
+
         if self._html:
             return self.format_username(mat[0:1], mat[1:])
-    
+
     def _parse_lists(self, match):
         '''Parse lists.'''
-        
+
         # Don't parse usernames here
         if match.group(4) is None:
             return match.group(0)
-        
+
         pre, at_char, user, list_name = match.groups()
         list_name = list_name[1:]
         self._lists.append((user, list_name))
-        
+
         if self._html:
             return '%s%s' % (pre, self.format_list(at_char, user, list_name))
-    
+
     def _parse_tags(self, match):
         '''Parse hashtags.'''
-        
+
         mat = match.group(0)
-        
+
         # Fix problems with the regex capturing stuff infront of the #
         tag = None
         for i in u'#\uff03':
             pos = mat.rfind(i)
             if pos != -1:
                 tag = i
                 break
-        
+
         pre, text = mat[:pos], mat[pos + 1:]
         self._tags.append(text)
-        
+
         if self._html:
             return '%s%s' % (pre, self.format_tag(tag, text))
-    
+
     def _shorten_url(self, text):
         '''Shorten a URL and make sure to not cut of html entities.'''
-        
+
         if len(text) > self._max_url_length and self._max_url_length != -1:
             text = text[0:self._max_url_length - 3]
             amp = text.rfind('&')
             close = text.rfind(';')
             if amp != -1 and (close == -1 or close < amp):
                 text = text[0:amp]
-            
+
             return text + '...'
-        
+
         else:
             return text
-    
-    
+
+
     # User defined formatters --------------------------------------------------
     def format_tag(self, tag, text):
         '''Return formatted HTML for a hashtag.'''
         return '<a href="http://search.twitter.com/search?q=%s">%s%s</a>' \
                 % (urllib.quote('#' + text.encode('utf-8')), tag, text)
-    
+
     def format_username(self, at_char, user):
         '''Return formatted HTML for a username.'''
         return '<a href="http://twitter.com/%s">%s%s</a>' \
                % (user, at_char, user)
-    
+
     def format_list(self, at_char, user, list_name):
         '''Return formatted HTML for a list.'''
         return '<a href="http://twitter.com/%s/%s">%s%s/%s</a>' \
                % (user, list_name, at_char, user, list_name)
-    
+
     def format_url(self, url, text):
         '''Return formatted HTML for a url.'''
         return '<a href="%s">%s</a>' % (escape(url), text)