-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #17 from letuananh/dev-0.2.3
coolisf 0.2.3 release candidate
- Loading branch information
Showing
59 changed files
with
244,726 additions
and
1,472 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,39 +1,12 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
''' | ||
""" | ||
Common functions | ||
""" | ||
|
||
Latest version can be found at https://github.com/letuananh/intsem.fx | ||
References: | ||
ACE: | ||
http://moin.delph-in.net/AceOptions | ||
@author: Le Tuan Anh <[email protected]> | ||
@license: MIT | ||
''' | ||
|
||
# Copyright (c) 2015, Le Tuan Anh <[email protected]> | ||
# | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy | ||
# of this software and associated documentation files (the "Software"), to deal | ||
# in the Software without restriction, including without limitation the rights | ||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
# copies of the Software, and to permit persons to whom the Software is | ||
# furnished to do so, subject to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice shall be included in | ||
# all copies or substantial portions of the Software. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
# THE SOFTWARE. | ||
|
||
######################################################################## | ||
# This code is a part of coolisf library: https://github.com/letuananh/intsem.fx | ||
# :copyright: (c) 2014 Le Tuan Anh <[email protected]> | ||
# :license: MIT, see LICENSE for more details. | ||
|
||
import os | ||
import gzip | ||
|
@@ -67,7 +40,7 @@ def read_file(file_path, mode='rt'): | |
|
||
|
||
def write_file(content, path=None): | ||
''' Write content to a file, or to console if no path is provided ''' | ||
""" Write content to a file, or to console if no path is provided """ | ||
if isinstance(content, str): | ||
mode = 'wt' | ||
else: | ||
|
@@ -91,7 +64,7 @@ def overlap(cfrom1, cto1, cfrom2, cto2): | |
|
||
|
||
def tags_to_concepts(sent): | ||
''' Take concepts from sentence-level tags and create token-level concepts ''' | ||
""" Take concepts from sentence-level tags and create token-level concepts """ | ||
for tag in sent.tags: | ||
tokens = [tk for tk in sent.tokens if overlap(tag.cfrom, tag.cto, tk.cfrom, tk.cto)] | ||
if tokens: | ||
|
@@ -100,7 +73,7 @@ def tags_to_concepts(sent): | |
|
||
|
||
def get_ep_lemma(ep): | ||
''' Get lemma from a pyDelphin elementary predicate ''' | ||
""" Get lemma from a pyDelphin elementary predicate """ | ||
# if ep.pred == 'named': | ||
if ep.carg: | ||
return ep.carg | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,43 +1,12 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
''' | ||
""" | ||
Configuration helper | ||
""" | ||
|
||
Latest version can be found at https://github.com/letuananh/intsem.fx | ||
References: | ||
ACE: | ||
http://moin.delph-in.net/AceOptions | ||
Python documentation: | ||
https://docs.python.org/ | ||
PEP 257 - Python Docstring Conventions: | ||
https://www.python.org/dev/peps/pep-0257/ | ||
@author: Le Tuan Anh <[email protected]> | ||
@license: MIT | ||
''' | ||
|
||
# Copyright (c) 2018, Le Tuan Anh <[email protected]> | ||
# | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy | ||
# of this software and associated documentation files (the "Software"), to deal | ||
# in the Software without restriction, including without limitation the rights | ||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
# copies of the Software, and to permit persons to whom the Software is | ||
# furnished to do so, subject to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice shall be included in | ||
# all copies or substantial portions of the Software. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
# THE SOFTWARE. | ||
|
||
######################################################################## | ||
# This code is a part of coolisf library: https://github.com/letuananh/intsem.fx | ||
# :copyright: (c) 2014 Le Tuan Anh <[email protected]> | ||
# :license: MIT, see LICENSE for more details. | ||
|
||
import os | ||
import logging | ||
|
@@ -46,7 +15,6 @@ | |
from coolisf.common import write_file | ||
from coolisf.data import read_config_template | ||
|
||
|
||
# ---------------------------------------------------------------------- | ||
# Configuration | ||
# ---------------------------------------------------------------------- | ||
|
@@ -61,9 +29,9 @@ def getLogger(): | |
|
||
|
||
def _get_config_manager(): | ||
''' Internal function for retrieving application config manager object | ||
""" Internal function for retrieving application config manager object | ||
Don't use this directly, use read_config() method instead | ||
''' | ||
""" | ||
return __app_config | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,11 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
''' CoolISF Data Access Package | ||
Latest version can be found at https://github.com/letuananh/intsem.fx | ||
@author: Le Tuan Anh <[email protected]> | ||
@license: MIT | ||
''' | ||
|
||
# This source code is a part of the Integrated Semantic Framework | ||
# Copyright (c) 2015, Le Tuan Anh <[email protected]> | ||
# LICENSE: The MIT License (MIT) | ||
# | ||
# Homepage: https://github.com/letuananh/intsem.fx | ||
# This code is a part of coolisf library: https://github.com/letuananh/intsem.fx | ||
# :copyright: (c) 2014 Le Tuan Anh <[email protected]> | ||
# :license: MIT, see LICENSE for more details. | ||
|
||
from .corpus import CorpusDAOSQLite | ||
from .tsdb import read_tsdb | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,35 +1,12 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
''' | ||
""" | ||
Cache DAO - Caching ACE and ISF parse results | ||
""" | ||
|
||
Latest version can be found at https://github.com/letuananh/intsem.fx | ||
@author: Le Tuan Anh <[email protected]> | ||
@license: MIT | ||
''' | ||
|
||
# Copyright (c) 2017, Le Tuan Anh <[email protected]> | ||
# | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy | ||
# of this software and associated documentation files (the "Software"), to deal | ||
# in the Software without restriction, including without limitation the rights | ||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
# copies of the Software, and to permit persons to whom the Software is | ||
# furnished to do so, subject to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice shall be included in | ||
# all copies or substantial portions of the Software. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
# THE SOFTWARE. | ||
|
||
######################################################################## | ||
# This code is a part of coolisf library: https://github.com/letuananh/intsem.fx | ||
# :copyright: (c) 2014 Le Tuan Anh <[email protected]> | ||
# :license: MIT, see LICENSE for more details. | ||
|
||
import os | ||
import os.path | ||
|
@@ -52,7 +29,7 @@ | |
|
||
|
||
class AceCache(Schema): | ||
''' Cache ACE output ''' | ||
""" Cache ACE output """ | ||
|
||
def __init__(self, data_source, setup_script=None, setup_file=AC_INIT_SCRIPT): | ||
Schema.__init__(self, data_source, setup_script=setup_script, setup_file=setup_file) | ||
|
@@ -104,7 +81,7 @@ def load(self, text, grm, pc, extra_args, ctx=None): | |
|
||
|
||
class ISFCache(Schema): | ||
''' Cache ISF output ''' | ||
""" Cache ISF output """ | ||
|
||
def __init__(self, data_source, setup_script=None, setup_file=PC_INIT_SCRIPT): | ||
Schema.__init__(self, data_source, setup_script=setup_script, setup_file=setup_file) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,31 +2,11 @@ | |
|
||
''' | ||
Corpus DAO - ISF Corpus management functions | ||
@author: Le Tuan Anh <[email protected]> | ||
@license: MIT | ||
''' | ||
|
||
# Copyright (c) 2017, Le Tuan Anh <[email protected]> | ||
# | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy | ||
# of this software and associated documentation files (the "Software"), to deal | ||
# in the Software without restriction, including without limitation the rights | ||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
# copies of the Software, and to permit persons to whom the Software is | ||
# furnished to do so, subject to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice shall be included in | ||
# all copies or substantial portions of the Software. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
# THE SOFTWARE. | ||
|
||
######################################################################## | ||
# This code is a part of coolisf library: https://github.com/letuananh/intsem.fx | ||
# :copyright: (c) 2014 Le Tuan Anh <[email protected]> | ||
# :license: MIT, see LICENSE for more details. | ||
|
||
# NOTE: This is used to be a parted of VisualKopasu, but relicensed | ||
# & migrated into coolISF | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,12 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
''' | ||
""" | ||
Rule DB for Optimus Engine | ||
@author: Le Tuan Anh <[email protected]> | ||
@license: MIT | ||
''' | ||
|
||
# Copyright (c) 2017, Le Tuan Anh <[email protected]> | ||
# | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy | ||
# of this software and associated documentation files (the "Software"), to deal | ||
# in the Software without restriction, including without limitation the rights | ||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
# copies of the Software, and to permit persons to whom the Software is | ||
# furnished to do so, subject to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice shall be included in | ||
# all copies or substantial portions of the Software. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
# THE SOFTWARE. | ||
|
||
######################################################################## | ||
""" | ||
|
||
# This code is a part of coolisf library: https://github.com/letuananh/intsem.fx | ||
# :copyright: (c) 2014 Le Tuan Anh <[email protected]> | ||
# :license: MIT, see LICENSE for more details. | ||
|
||
import os | ||
import os.path | ||
|
@@ -165,7 +145,7 @@ def flag_rule(self, lid, rid, flag, ctx=None): | |
|
||
@with_ctx | ||
def find_ruleinfo_by_head(self, head, carg=None, flag=None, ctx=None, restricted=True): | ||
''' Find rules related to a predicate ''' | ||
""" Find rules related to a predicate """ | ||
query = ['head = ?'] | ||
params = [head] | ||
if restricted: | ||
|
@@ -177,14 +157,14 @@ def find_ruleinfo_by_head(self, head, carg=None, flag=None, ctx=None, restricted | |
|
||
@with_ctx | ||
def find_ruleinfo(self, nodes, restricted=True, limit=None, ctx=None): | ||
''' Find applicable rules for given DMRS nodes ''' | ||
template = ''' | ||
""" Find applicable rules for given DMRS nodes """ | ||
template = """ | ||
head IN ({head}) {res} | ||
AND (ID IN (SELECT ruleid FROM rulepred WHERE {incl}) | ||
{incl_carg}) | ||
{excl_carg} | ||
AND ID NOT IN (SELECT ruleid FROM rulepred WHERE {excl}) | ||
''' | ||
""" | ||
params_heads = ['udef_q', 'unknown'] | ||
# ruleinfo's flag == 2 (coolisf.model.RuleInfo.COMPOUND) | ||
# lexunit.flag > 3 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,12 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
''' | ||
""" | ||
Raw Text Corpus manager | ||
@author: Le Tuan Anh <[email protected]> | ||
@license: MIT | ||
''' | ||
|
||
# Copyright (c) 2017, Le Tuan Anh <[email protected]> | ||
# | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy | ||
# of this software and associated documentation files (the "Software"), to deal | ||
# in the Software without restriction, including without limitation the rights | ||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
# copies of the Software, and to permit persons to whom the Software is | ||
# furnished to do so, subject to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice shall be included in | ||
# all copies or substantial portions of the Software. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
# THE SOFTWARE. | ||
|
||
######################################################################## | ||
""" | ||
|
||
# This code is a part of coolisf library: https://github.com/letuananh/intsem.fx | ||
# :copyright: (c) 2014 Le Tuan Anh <[email protected]> | ||
# :license: MIT, see LICENSE for more details. | ||
|
||
import os | ||
import json | ||
|
Oops, something went wrong.