Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: remove param mutation + more #164

Merged
merged 34 commits into from
Jul 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
8da77db
refactor: BySchoolYearCourseJson
lorenzocorallo Jul 5, 2024
ab1033f
refactor: merge EnrollUtil into EnrollType
lorenzocorallo Jul 5, 2024
ae59f35
refactor: rename method in HashMatricola
lorenzocorallo Jul 5, 2024
6c929ef
refactor: rename method in Parser/Program.cs
lorenzocorallo Jul 5, 2024
f0c76c0
refactor: rename param in UpdateDateFound
lorenzocorallo Jul 5, 2024
e7a33db
chore: remove unused import
lorenzocorallo Jul 5, 2024
c3daf44
refactor: move ranking saving to RankingsSet and Ranking
lorenzocorallo Jul 5, 2024
2f4ac69
chore: remove fake-null param
lorenzocorallo Jul 5, 2024
e226379
refactor: BySchoolYearJson.cs
lorenzocorallo Jul 6, 2024
5ba9c9d
Merge branch 'refs/heads/main' into 105-refactor-param-mutation
lorenzocorallo Jul 6, 2024
ec6ae7d
fix code related to merge
lorenzocorallo Jul 6, 2024
5bfe9b2
refactor: ByYearSchoolJson.cs
lorenzocorallo Jul 6, 2024
7a83b42
chore: remove orphan nullable attribute from RankingsSet params
lorenzocorallo Jul 6, 2024
14a9bc8
refactor: StatsJson.cs + rename StatsSingleJson.cs method
lorenzocorallo Jul 6, 2024
e86ed6d
refactor: StatsSchool.cs
lorenzocorallo Jul 6, 2024
94a694f
refactor: StatsYear.cs
lorenzocorallo Jul 6, 2024
64b7b14
refactor: rename WriteToFile to Write for consistency
lorenzocorallo Jul 7, 2024
dbd3599
refactor: StatsJson.cs and related
lorenzocorallo Jul 7, 2024
6089436
hotfix: distinct SingleCourseJson in BySchoolYearJson.All
lorenzocorallo Jul 7, 2024
e50326f
hotfix: same distinct as BySchoolYearJson for other indexes
lorenzocorallo Jul 7, 2024
abdeff7
fix: remove Location from Distinct if not necessary
lorenzocorallo Jul 7, 2024
1f9919a
refactor: remove unused Ranking methods
lorenzocorallo Jul 7, 2024
5cce969
refactor: remove unused RankingOrder method
lorenzocorallo Jul 7, 2024
1dcef86
refactor: RankingsSet.cs
lorenzocorallo Jul 7, 2024
c4f14db
chore: format RankingSummary.cs
lorenzocorallo Jul 7, 2024
4d9f2f8
refactor: HashMatricoleWrite.cs
lorenzocorallo Jul 7, 2024
4ee5e5f
chore: compact class init and method call
lorenzocorallo Jul 7, 2024
d177b71
refactor: RankingSummaryStudent.cs
lorenzocorallo Jul 7, 2024
53c303d
refactor: StudentHashSummary.cs
lorenzocorallo Jul 7, 2024
2217759
refactor: Scraper/Program.cs
lorenzocorallo Jul 7, 2024
8087560
refactor: CheckUrlUtil.cs
lorenzocorallo Jul 7, 2024
971c8d6
refactor: ScraperOutput.cs
lorenzocorallo Jul 7, 2024
69e2b1b
chore: remove unused import
lorenzocorallo Jul 7, 2024
49d8ca9
chore: inspect code (#169)
angeousta Jul 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Lib_CSharp
1 change: 1 addition & 0 deletions PoliNetwork.Graduatorie.Common/Data/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ public static class Constants
public const string HtmlFolder = "html";
public const string OutputLinksFilename = "links.txt";
public const string DataFolder = "data";
public const string LocationPlaceholder = "0";
}
25 changes: 23 additions & 2 deletions PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace PoliNetwork.Graduatorie.Common.Objects;
[JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))]
public class EnrollType
{
public bool? CanEnroll;
public bool CanEnroll;
public string? Course;
public string? Type;

Expand All @@ -20,8 +20,29 @@ public int GetHashWithoutLastUpdate()
var i = "EnrollTypeNotNull".GetHashCode();
i ^= Course?.GetHashCode() ?? "Course".GetHashCode();
i ^= Type?.GetHashCode() ?? "Type".GetHashCode();
i ^= CanEnroll?.GetHashCode() ?? "CanEnroll".GetHashCode();

return i;
}

public static EnrollType From(string? rowCanEnrollInto, bool rowCanEnroll)
{
if (rowCanEnroll == false)
return new EnrollType { CanEnroll = false, Course = null, Type = null };

if (string.IsNullOrEmpty(rowCanEnrollInto))
return new EnrollType { CanEnroll = true, Course = null, Type = null };

string[] tester = { "assegnato", "prenotato" };
const string sep = " - ";
if (!rowCanEnrollInto.Contains(sep) || !tester.Any(t => rowCanEnrollInto.ToLower().Contains(t)))
return new EnrollType { CanEnroll = true, Course = rowCanEnrollInto, Type = null };

var s = rowCanEnrollInto.Split(sep).ToList();
var type = s.FirstOrDefault(x => tester.Any(t => t == x.ToLower()));

if (type != null) s.Remove(type);

var course = string.Join(sep, s);
return new EnrollType { CanEnroll = true, Course = course, Type = type };
}
}
32 changes: 0 additions & 32 deletions PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs

This file was deleted.

8 changes: 4 additions & 4 deletions PoliNetwork.Graduatorie.Common/Utils/HashNS/HashMatricola.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public static partial class HashMatricola
return string.IsNullOrEmpty(s) ? null : NotAlphaNumericRegex().Replace(s, "");
}

public static string? HashMatricolaMethod(string? input)
public static string? Get(string? input)
{
input = CleanInput(input);

Expand All @@ -33,9 +33,9 @@ public static partial class HashMatricola

var stringInputWithSalt = input + SaltGlobal;
var hexHash = GetSha256(stringInputWithSalt);
var hashMatricolaMethod = hexHash[..MaxCharHash];
var matricolaMethod = hashMatricolaMethod.ToLower();
return matricolaMethod;
var shortHash = hexHash[..MaxCharHash];
var lowerShortHash = shortHash.ToLower();
return lowerShortHash;
}

private static string GetSha256(string stringInputWithSalt)
Expand Down
7 changes: 3 additions & 4 deletions PoliNetwork.Graduatorie.Parser/Main/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ public static void Main(string[] args)
var rankingsUrls = Scraper.Main.Program.RankingsUrls(mt, argsConfig);

// esegui ciò che fa il parser (parse + write)
ParserDo(argsConfig, rankingsUrls);
RunParser(argsConfig, rankingsUrls);
}

private static void ParserDo(ArgsConfig argsConfig, IEnumerable<RankingUrl> rankingsUrls)
private static void RunParser(ArgsConfig argsConfig, IEnumerable<RankingUrl> rankingsUrls)
{
// ricava un unico set partendo dai file html salvati, dagli url
// trovati e dal precedente set salvato nel .json
Expand All @@ -35,7 +35,6 @@ private static void ParserDo(ArgsConfig argsConfig, IEnumerable<RankingUrl> rank
var dateFound = DateFoundUtil.GetDateFound(argsConfig, rankingsSet);

// salvare il set
var writer = new OutputWriteUtil(argsConfig);
writer.SaveOutputs(rankingsSet, dateFound);
new OutputWriteUtil(argsConfig).SaveOutputs(rankingsSet, dateFound);
}
}
6 changes: 3 additions & 3 deletions PoliNetwork.Graduatorie.Parser/Objects/Json/DateFound.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ public void WriteToFile(string dataFolder)
return date1 < date2 ? date1 : date2;
}

public void UpdateDateFound(Ranking variable)
public void UpdateDateFound(Ranking ranking)
{
var path = variable.GetPath().Trim();
var minDateTime = GetMinTime(variable, path);
var path = ranking.GetFullPath().Trim();
var minDateTime = GetMinTime(ranking, path);
SetDate(path, minDateTime);
}

Expand Down
117 changes: 5 additions & 112 deletions PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,8 @@
using Newtonsoft.Json;
using Newtonsoft.Json.Serialization;
using PoliNetwork.Graduatorie.Common.Data;
using PoliNetwork.Graduatorie.Common.Objects;
using PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific;
using PoliNetwork.Graduatorie.Parser.Objects.RankingNS;
using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course;
using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit;

#endregion

Expand All @@ -19,122 +16,18 @@ public abstract class IndexJsonBase
{
public DateTime? LastUpdate;

public void WriteToFile(string outFolder, string pathFile)
public void Write(string outFolder, string pathFile)
{
var mainJsonPath = Path.Join(outFolder, pathFile);
var mainJsonString = JsonConvert.SerializeObject(this, Culture.JsonSerializerSettings);
File.WriteAllText(mainJsonPath, mainJsonString);
}


public static void WriteSingleJsons(RankingsSet? set, string outFolder, ArgsConfig argsConfig)
{
if (set == null)
return;

// group rankings by year
var bySchool = set.Rankings.GroupBy(r => r.School);
foreach (var schoolGroup in bySchool)
{
if (schoolGroup.Key is null)
continue;
var school = schoolGroup.Key.Value;

var byYears = schoolGroup.GroupBy(r => r.Year);
foreach (var yearGroup in byYears)
{
if (yearGroup.Key is null)
continue;
var year = yearGroup.Key.Value;
var folder = Path.Join(outFolder, school.ToString(), year.ToString());
Directory.CreateDirectory(folder);

foreach (var ranking in yearGroup) WriteSingleJsonRanking(folder, ranking, argsConfig);
}
}
}

private static void WriteSingleJsonRanking(string folder, Ranking ranking, ArgsConfig argsConfig)
{
var path = Path.Join(folder, ranking.GetFilename());

if (ExitIfAlreadyExistsAndNotUpdated(ranking, path) && !argsConfig.ForceReparsing) return;

var rankingJsonString = JsonConvert.SerializeObject(ranking, Culture.JsonSerializerSettings);
File.WriteAllText(path, rankingJsonString);
}

private static bool ExitIfAlreadyExistsAndNotUpdated(Ranking a, string path)
{
if (!File.Exists(path)) return false;
var b = GetRankingFromFile(path);
return b != null && SameHash(a, b);
}

private static bool SameHash(Ranking a, Ranking b)
{
var ai = a.GetHashWithoutLastUpdate();
var bi = b.GetHashWithoutLastUpdate();
return ai == bi;
}

private static bool SameHashCourse(IReadOnlyCollection<CourseTable>? aTableCourse,
IReadOnlyCollection<CourseTable>? bTableCourse)
public static void WriteAllIndexes(RankingsSet rankingsSet, string outFolder)
{
if (aTableCourse == null && bTableCourse == null)
return true;
if (aTableCourse == null || bTableCourse == null)
return false;

if (aTableCourse.Count != bTableCourse.Count)
return false;

var aHash = aTableCourse.Select(variable =>
{
var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate();
return hashWithoutLastUpdate;
}).ToList();

var bHash = bTableCourse.Select(variable =>
{
var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate();
return hashWithoutLastUpdate;
}).ToList();

var ai = aHash;
var bi = bHash;

return ai == bi;
}

private static bool SameHashMerit(MeritTable? aTableMerit, MeritTable? bTableMerit)
{
if (aTableMerit == null && bTableMerit == null)
return true;
if (aTableMerit == null || bTableMerit == null)
return false;

var ai = aTableMerit.GetHashWithoutLastUpdate();
var bi = bTableMerit.GetHashWithoutLastUpdate();
return ai == bi;
}

private static Ranking? GetRankingFromFile(string path)
{
var x = File.ReadAllText(path);

var j = JsonConvert.DeserializeObject<Ranking>(x, Culture.JsonSerializerSettings);
return j;
}

public static void IndexesWrite(RankingsSet? rankingsSet, string outFolder, ArgsConfig argsConfig)
{
//let's write all single json files
WriteSingleJsons(rankingsSet, outFolder, argsConfig);

//now let's write each single different index
BySchoolYearJson.From(rankingsSet)?.WriteToFile(outFolder, BySchoolYearJson.PathCustom);
ByYearSchoolJson.From(rankingsSet)?.WriteToFile(outFolder, ByYearSchoolJson.PathCustom);
BySchoolYearCourseJson.From(rankingsSet)?.WriteToFile(outFolder, BySchoolYearCourseJson.PathCustom);
BySchoolYearJson.From(rankingsSet).Write(outFolder, BySchoolYearJson.CustomPath);
ByYearSchoolJson.From(rankingsSet).Write(outFolder, ByYearSchoolJson.CustomPath);
BySchoolYearCourseJson.From(rankingsSet).Write(outFolder, BySchoolYearCourseJson.CustomPath);
}
}
Loading
Loading