Skip to content

Commit

Permalink
Add Windows support and a requirements.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
ranok authored Oct 27, 2023
1 parent 1a40de4 commit 9954e24
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 8 deletions.
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Needed for ensembling
numpy
# Needed for brotli compression
brotli
16 changes: 8 additions & 8 deletions zippy.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def clean_text(s : str) -> str:

# The prelude file is a text file containing only AI-generated text, it is used to 'seed' the LZMA dictionary
PRELUDE_FILE : str = 'ai-generated.txt'
with open(PRELUDE_FILE, 'r') as fp:
with open(PRELUDE_FILE, 'r', encoding='utf-8') as fp:
PRELUDE_STR = clean_text(fp.read())

class AIDetector(ABC):
Expand All @@ -62,7 +62,7 @@ def __init__(self, prelude_file : Optional[str] = None, prelude_str : Optional[s
self.prelude_ratio = prelude_ratio

if prelude_file != None:
with open(prelude_file) as fp:
with open(prelude_file, encoding='utf-8') as fp:
self.prelude_str = clean_text(fp.read())
self.prelude_ratio = self._compress(self.prelude_str)
return
Expand Down Expand Up @@ -102,7 +102,7 @@ def __init__(self, prelude_file : Optional[str] = None, prelude_str : Optional[s
self.prelude_ratio = prelude_ratio

if prelude_file != None:
with open(prelude_file) as fp:
with open(prelude_file, encoding='utf-8') as fp:
self.prelude_str = clean_text(fp.read())
lines = self.prelude_str.split('\n')
self.prelude_chunks = array_split(lines, ceil(len(self.prelude_str) / 2**abs(self.WBITS)))
Expand Down Expand Up @@ -153,7 +153,7 @@ def __init__(self, prelude_file : Optional[str] = None, prelude_str : Optional[s

if prelude_file != None:
# Read it once to get the default compression ratio for the prelude
with open(prelude_file, 'r') as fp:
with open(prelude_file, 'r', encoding='utf-8') as fp:
self.prelude_str = fp.read()
self.prelude_ratio = self._compress(self.prelude_str)
return
Expand Down Expand Up @@ -212,7 +212,7 @@ def __init__(self, engine : CompressionEngine = CompressionEngine.LZMA, preset :

def run_on_file(self, filename : str) -> Optional[Score]:
'''Given a filename (and an optional number of decimal places to round to) returns the score for the contents of that file'''
with open(filename, 'r') as fp:
with open(filename, 'r', encoding='utf-8') as fp:
txt = fp.read()
#print('Calculating score for input of length ' + str(len(txt)))
return self.detector.score_text(txt)
Expand All @@ -230,7 +230,7 @@ def run_on_file_chunked(self, filename : str, chunk_size : int = 1500, prelude_r
This function chunks the file into at most chunk_size parts to score separately, then returns an average. This prevents a very large input
being skewed because its compression ratio starts to overwhelm the prelude file.
'''
with open(filename, 'r') as fp:
with open(filename, 'r', encoding='utf-8') as fp:
contents = fp.read()
return self.run_on_text_chunked(contents, chunk_size, prelude_ratio=prelude_ratio)

Expand Down Expand Up @@ -296,7 +296,7 @@ def _combine_scores(self, scores : list[Score]) -> Score:

def run_on_file(self, filename : str) -> Optional[Score]:
'''Given a filename (and an optional number of decimal places to round to) returns the score for the contents of that file'''
with open(filename, 'r') as fp:
with open(filename, 'r', encoding='utf-8') as fp:
txt = fp.read()
scores = []
for c in self.component_classifiers:
Expand All @@ -315,7 +315,7 @@ def run_on_file_chunked(self, filename : str, chunk_size : int = 1500, prelude_r
This function chunks the file into at most chunk_size parts to score separately, then returns an average. This prevents a very large input
being skewed because its compression ratio starts to overwhelm the prelude file.
'''
with open(filename, 'r') as fp:
with open(filename, 'r', encoding='utf-8') as fp:
contents = fp.read()
return self.run_on_text_chunked(contents, chunk_size)

Expand Down

0 comments on commit 9954e24

Please sign in to comment.