diff --git a/Lib_CSharp b/Lib_CSharp index 6b7ed8c0..e53d6712 160000 --- a/Lib_CSharp +++ b/Lib_CSharp @@ -1 +1 @@ -Subproject commit 6b7ed8c04f1728570c2f7575bc1ceab2b713f04b +Subproject commit e53d671278c21d349835af31394d7bc42be49e7b diff --git a/PoliNetwork.Graduatorie.Common/Data/Constants.cs b/PoliNetwork.Graduatorie.Common/Data/Constants.cs index 121af72b..1edaff74 100644 --- a/PoliNetwork.Graduatorie.Common/Data/Constants.cs +++ b/PoliNetwork.Graduatorie.Common/Data/Constants.cs @@ -9,4 +9,5 @@ public static class Constants public const string HtmlFolder = "html"; public const string OutputLinksFilename = "links.txt"; public const string DataFolder = "data"; + public const string LocationPlaceholder = "0"; } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs b/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs index 97671355..09d44d99 100644 --- a/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs +++ b/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs @@ -11,7 +11,7 @@ namespace PoliNetwork.Graduatorie.Common.Objects; [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class EnrollType { - public bool? CanEnroll; + public bool CanEnroll; public string? Course; public string? Type; @@ -20,8 +20,29 @@ public int GetHashWithoutLastUpdate() var i = "EnrollTypeNotNull".GetHashCode(); i ^= Course?.GetHashCode() ?? "Course".GetHashCode(); i ^= Type?.GetHashCode() ?? "Type".GetHashCode(); - i ^= CanEnroll?.GetHashCode() ?? "CanEnroll".GetHashCode(); return i; } + + public static EnrollType From(string? rowCanEnrollInto, bool rowCanEnroll) + { + if (rowCanEnroll == false) + return new EnrollType { CanEnroll = false, Course = null, Type = null }; + + if (string.IsNullOrEmpty(rowCanEnrollInto)) + return new EnrollType { CanEnroll = true, Course = null, Type = null }; + + string[] tester = { "assegnato", "prenotato" }; + const string sep = " - "; + if (!rowCanEnrollInto.Contains(sep) || !tester.Any(t => rowCanEnrollInto.ToLower().Contains(t))) + return new EnrollType { CanEnroll = true, Course = rowCanEnrollInto, Type = null }; + + var s = rowCanEnrollInto.Split(sep).ToList(); + var type = s.FirstOrDefault(x => tester.Any(t => t == x.ToLower())); + + if (type != null) s.Remove(type); + + var course = string.Join(sep, s); + return new EnrollType { CanEnroll = true, Course = course, Type = type }; + } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs b/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs deleted file mode 100644 index a9bf26a7..00000000 --- a/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs +++ /dev/null @@ -1,32 +0,0 @@ -#region - -using PoliNetwork.Graduatorie.Common.Objects; - -#endregion - -namespace PoliNetwork.Graduatorie.Common.Utils; - -public static class EnrollUtil -{ - public static EnrollType GetEnrollType(string? rowCanEnrollInto, bool rowCanEnroll) - { - if (rowCanEnroll == false) - return new EnrollType { CanEnroll = false, Course = null, Type = null }; - - if (string.IsNullOrEmpty(rowCanEnrollInto)) - return new EnrollType { CanEnroll = true, Course = null, Type = null }; - - string[] tester = { "assegnato", "prenotato" }; - const string sep = " - "; - if (!rowCanEnrollInto.Contains(sep) || !tester.Any(t => rowCanEnrollInto.ToLower().Contains(t))) - return new EnrollType { CanEnroll = true, Course = rowCanEnrollInto, Type = null }; - - var s = rowCanEnrollInto.Split(sep).ToList(); - var type = s.FirstOrDefault(x => tester.Any(t => t == x.ToLower())); - - if (type != null) s.Remove(type); - - var course = string.Join(sep, s); - return new EnrollType { CanEnroll = true, Course = course, Type = type }; - } -} \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Common/Utils/HashNS/HashMatricola.cs b/PoliNetwork.Graduatorie.Common/Utils/HashNS/HashMatricola.cs index f685c431..6f189ba3 100644 --- a/PoliNetwork.Graduatorie.Common/Utils/HashNS/HashMatricola.cs +++ b/PoliNetwork.Graduatorie.Common/Utils/HashNS/HashMatricola.cs @@ -24,7 +24,7 @@ public static partial class HashMatricola return string.IsNullOrEmpty(s) ? null : NotAlphaNumericRegex().Replace(s, ""); } - public static string? HashMatricolaMethod(string? input) + public static string? Get(string? input) { input = CleanInput(input); @@ -33,9 +33,9 @@ public static partial class HashMatricola var stringInputWithSalt = input + SaltGlobal; var hexHash = GetSha256(stringInputWithSalt); - var hashMatricolaMethod = hexHash[..MaxCharHash]; - var matricolaMethod = hashMatricolaMethod.ToLower(); - return matricolaMethod; + var shortHash = hexHash[..MaxCharHash]; + var lowerShortHash = shortHash.ToLower(); + return lowerShortHash; } private static string GetSha256(string stringInputWithSalt) diff --git a/PoliNetwork.Graduatorie.Parser/Main/Program.cs b/PoliNetwork.Graduatorie.Parser/Main/Program.cs index 988ee648..c993f2cd 100644 --- a/PoliNetwork.Graduatorie.Parser/Main/Program.cs +++ b/PoliNetwork.Graduatorie.Parser/Main/Program.cs @@ -22,10 +22,10 @@ public static void Main(string[] args) var rankingsUrls = Scraper.Main.Program.RankingsUrls(mt, argsConfig); // esegui ciò che fa il parser (parse + write) - ParserDo(argsConfig, rankingsUrls); + RunParser(argsConfig, rankingsUrls); } - private static void ParserDo(ArgsConfig argsConfig, IEnumerable rankingsUrls) + private static void RunParser(ArgsConfig argsConfig, IEnumerable rankingsUrls) { // ricava un unico set partendo dai file html salvati, dagli url // trovati e dal precedente set salvato nel .json @@ -35,7 +35,6 @@ private static void ParserDo(ArgsConfig argsConfig, IEnumerable rank var dateFound = DateFoundUtil.GetDateFound(argsConfig, rankingsSet); // salvare il set - var writer = new OutputWriteUtil(argsConfig); - writer.SaveOutputs(rankingsSet, dateFound); + new OutputWriteUtil(argsConfig).SaveOutputs(rankingsSet, dateFound); } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/DateFound.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/DateFound.cs index 962fa1c1..faed551f 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/DateFound.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/DateFound.cs @@ -39,10 +39,10 @@ public void WriteToFile(string dataFolder) return date1 < date2 ? date1 : date2; } - public void UpdateDateFound(Ranking variable) + public void UpdateDateFound(Ranking ranking) { - var path = variable.GetPath().Trim(); - var minDateTime = GetMinTime(variable, path); + var path = ranking.GetFullPath().Trim(); + var minDateTime = GetMinTime(ranking, path); SetDate(path, minDateTime); } diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs index 525b664c..f685e341 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs @@ -3,11 +3,8 @@ using Newtonsoft.Json; using Newtonsoft.Json.Serialization; using PoliNetwork.Graduatorie.Common.Data; -using PoliNetwork.Graduatorie.Common.Objects; using PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; -using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; -using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit; #endregion @@ -19,122 +16,18 @@ public abstract class IndexJsonBase { public DateTime? LastUpdate; - public void WriteToFile(string outFolder, string pathFile) + public void Write(string outFolder, string pathFile) { var mainJsonPath = Path.Join(outFolder, pathFile); var mainJsonString = JsonConvert.SerializeObject(this, Culture.JsonSerializerSettings); File.WriteAllText(mainJsonPath, mainJsonString); } - - public static void WriteSingleJsons(RankingsSet? set, string outFolder, ArgsConfig argsConfig) - { - if (set == null) - return; - - // group rankings by year - var bySchool = set.Rankings.GroupBy(r => r.School); - foreach (var schoolGroup in bySchool) - { - if (schoolGroup.Key is null) - continue; - var school = schoolGroup.Key.Value; - - var byYears = schoolGroup.GroupBy(r => r.Year); - foreach (var yearGroup in byYears) - { - if (yearGroup.Key is null) - continue; - var year = yearGroup.Key.Value; - var folder = Path.Join(outFolder, school.ToString(), year.ToString()); - Directory.CreateDirectory(folder); - - foreach (var ranking in yearGroup) WriteSingleJsonRanking(folder, ranking, argsConfig); - } - } - } - - private static void WriteSingleJsonRanking(string folder, Ranking ranking, ArgsConfig argsConfig) - { - var path = Path.Join(folder, ranking.GetFilename()); - - if (ExitIfAlreadyExistsAndNotUpdated(ranking, path) && !argsConfig.ForceReparsing) return; - - var rankingJsonString = JsonConvert.SerializeObject(ranking, Culture.JsonSerializerSettings); - File.WriteAllText(path, rankingJsonString); - } - - private static bool ExitIfAlreadyExistsAndNotUpdated(Ranking a, string path) - { - if (!File.Exists(path)) return false; - var b = GetRankingFromFile(path); - return b != null && SameHash(a, b); - } - - private static bool SameHash(Ranking a, Ranking b) - { - var ai = a.GetHashWithoutLastUpdate(); - var bi = b.GetHashWithoutLastUpdate(); - return ai == bi; - } - - private static bool SameHashCourse(IReadOnlyCollection? aTableCourse, - IReadOnlyCollection? bTableCourse) + public static void WriteAllIndexes(RankingsSet rankingsSet, string outFolder) { - if (aTableCourse == null && bTableCourse == null) - return true; - if (aTableCourse == null || bTableCourse == null) - return false; - - if (aTableCourse.Count != bTableCourse.Count) - return false; - - var aHash = aTableCourse.Select(variable => - { - var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate(); - return hashWithoutLastUpdate; - }).ToList(); - - var bHash = bTableCourse.Select(variable => - { - var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate(); - return hashWithoutLastUpdate; - }).ToList(); - - var ai = aHash; - var bi = bHash; - - return ai == bi; - } - - private static bool SameHashMerit(MeritTable? aTableMerit, MeritTable? bTableMerit) - { - if (aTableMerit == null && bTableMerit == null) - return true; - if (aTableMerit == null || bTableMerit == null) - return false; - - var ai = aTableMerit.GetHashWithoutLastUpdate(); - var bi = bTableMerit.GetHashWithoutLastUpdate(); - return ai == bi; - } - - private static Ranking? GetRankingFromFile(string path) - { - var x = File.ReadAllText(path); - - var j = JsonConvert.DeserializeObject(x, Culture.JsonSerializerSettings); - return j; - } - - public static void IndexesWrite(RankingsSet? rankingsSet, string outFolder, ArgsConfig argsConfig) - { - //let's write all single json files - WriteSingleJsons(rankingsSet, outFolder, argsConfig); - //now let's write each single different index - BySchoolYearJson.From(rankingsSet)?.WriteToFile(outFolder, BySchoolYearJson.PathCustom); - ByYearSchoolJson.From(rankingsSet)?.WriteToFile(outFolder, ByYearSchoolJson.PathCustom); - BySchoolYearCourseJson.From(rankingsSet)?.WriteToFile(outFolder, BySchoolYearCourseJson.PathCustom); + BySchoolYearJson.From(rankingsSet).Write(outFolder, BySchoolYearJson.CustomPath); + ByYearSchoolJson.From(rankingsSet).Write(outFolder, ByYearSchoolJson.CustomPath); + BySchoolYearCourseJson.From(rankingsSet).Write(outFolder, BySchoolYearCourseJson.CustomPath); } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs index f7f51b21..d41629b2 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs @@ -1,44 +1,52 @@ #region +using System.Diagnostics; using Newtonsoft.Json; using Newtonsoft.Json.Serialization; -using PoliNetwork.Graduatorie.Common.Data; using PoliNetwork.Graduatorie.Common.Enums; -using PoliNetwork.Graduatorie.Common.Utils.ParallelNS; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; -using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; #endregion namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; +using SchoolsDict = + SortedDictionary>>>>; +using YearsDict = SortedDictionary>>>; +using CoursesDict = SortedDictionary>>; +using CourseDict = SortedDictionary>; + [Serializable] [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class BySchoolYearCourseJson : IndexJsonBase { - internal const string PathCustom = "bySchoolYearCourse.json"; + internal const string CustomPath = "bySchoolYearCourse.json"; + public List All = new(); // decide whether to include it in the json serialization //keys: school, year, course, location - public SortedDictionary< - SchoolEnum, - SortedDictionary>>> - > Schools = new(); + public SchoolsDict Schools = new(); - public static BySchoolYearCourseJson? From(RankingsSet? set) + public static BySchoolYearCourseJson From(RankingsSet set) { - if (set == null) - return null; - var mainJson = new BySchoolYearCourseJson { LastUpdate = set.LastUpdate }; + + var list = set.Rankings + .SelectMany(r => r.ToSingleCourseJson()) + .DistinctBy(r => new { r.Id, r.Location }) + .ToList(); + + list.Sort(); + mainJson.All = list; + // group rankings by school - var bySchool = set.Rankings.GroupBy(r => r.School); + var bySchool = set.Rankings.Where(r => r.School != null).GroupBy(r => r.School!.Value); + foreach (var schoolGroup in bySchool) { - if (schoolGroup.Key is null) - continue; - var school = schoolGroup.Key.Value; + var school = schoolGroup.Key; - var byYears = schoolGroup.GroupBy(r => r.Year); + var byYears = schoolGroup.Where(r => r.Year != null).GroupBy(r => r.Year!.Value); var yearsDict = GetYearsDict(byYears); mainJson.Schools.Add(school, yearsDict); @@ -48,249 +56,67 @@ public SortedDictionary< return mainJson; } - private static SortedDictionary< - int, - SortedDictionary>> - > GetYearsDict(IEnumerable> byYears) - { - var d = - new SortedDictionary>>>(); - - foreach (var yearGroup in byYears) GetYearsDictSingle(yearGroup, d); - - return d; - } - - private static void GetYearsDictSingle(IGrouping yearGroup, - IDictionary>>> d) - { - if (yearGroup.Key != null) d.Add(yearGroup.Key.Value, GetCoursesDict(yearGroup)); - } - - private static SortedDictionary>> GetCoursesDict( - IEnumerable yearGroup - ) + private static YearsDict GetYearsDict(IEnumerable> byYears) { - var d = new SortedDictionary>>(); + var yearsDict = new YearsDict(); - foreach (var ranking in yearGroup) + foreach (var yearGroup in byYears) { - if (ranking.ByCourse == null) - continue; - - var byTitle = ranking.ByCourse.GroupBy(c => c.Title); - foreach (var courseGroup in byTitle) - AddCourseToDict(d, ranking, courseGroup); + var coursesDict = GetCoursesDict(yearGroup); + yearsDict.Add(yearGroup.Key, coursesDict); } - return d; + return yearsDict; } - private static void AddCourseToDict( - IDictionary>> d, - Ranking ranking, - IGrouping courseGroup - ) + private static CoursesDict GetCoursesDict(IEnumerable yearGroup) { - var title = courseGroup.Key; - if (string.IsNullOrEmpty(title)) - return; + var coursesDict = new CoursesDict(); - if (!d.ContainsKey(title)) - d[title] = new SortedDictionary>(); - - var courseDict = d[title]; - foreach (var course in courseGroup) + foreach (var ranking in yearGroup) { - var location = course.Location; - - // fixedLocation - // esempio: Urbanistica 2022 ha un solo corso senza location, ma anche quello - // deve comparire nella lista - // fix: se un corso non ha location, si inserisce un valore 0 - var fixedLocation = string.IsNullOrEmpty(location) ? "0" : location; - - if (!courseDict.ContainsKey(fixedLocation)) - courseDict[fixedLocation] = new List(); - - var locationDict = courseDict[fixedLocation]; - var singleCourseJson = CreateCourseJson(ranking, course); - - if (locationDict.Any(IsThisCourse)) + if (ranking.ByCourse == null) continue; - locationDict.Add(singleCourseJson); - locationDict.Sort(Comparison); - continue; - - bool IsThisCourse(SingleCourseJson x) - { - return x.Link == singleCourseJson.Link && x.Location == singleCourseJson.Location; - } - } - } - - private static int Comparison(SingleCourseJson x, SingleCourseJson y) - { - return x.CompareTo(y); - } - - private static SingleCourseJson CreateCourseJson(Ranking ranking, CourseTable course) - { - var basePath = ranking.School + "/" + ranking.Year + "/"; - return new SingleCourseJson - { - Link = ranking.GetFilename(), - Id = ranking.GetId(), - BasePath = basePath, - Year = ranking.Year, - School = ranking.School, - Location = course.Location, - RankingOrder = ranking.RankingOrder - }; - } - - private static bool IsSimilar(IEnumerable yearGroup, SingleCourseJson singleCourseJson) - { - var enumerable = yearGroup.Where(v1 => v1.ByCourse != null); - - return enumerable.Any(Predicate); - - bool Predicate(Ranking v1) - { - return singleCourseJson.School == v1.School - && singleCourseJson.Year == v1.Year - && singleCourseJson.RankingOrder?.GetId() == v1.RankingOrder?.GetId(); - } - } - - public static RankingsSet? Parse(string dataFolder) - { - var outFolder = Path.Join(dataFolder, Constants.OutputFolder); - var mainJsonPath = Path.Join(outFolder, PathCustom); - try - { - var mainJson = Utils.Transformer.ParserNS.Parser.ParseJson(mainJsonPath); - if (mainJson is null) - return null; - - var rankings = RankingsAdd(mainJson, outFolder); - - return new RankingsSet { LastUpdate = mainJson.LastUpdate, Rankings = rankings }; - } - catch - { - // ignored - } + var byTitle = + ranking.ByCourse.Where(c => c.Title != null).GroupBy(c => c.Title!); // e.g. INGEGNERIA AEROSPAZIALE - return null; - } - - private static List RankingsAdd(BySchoolYearCourseJson mainJson, string outFolder) - { - List rankings = new(); - foreach (var school in mainJson.Schools) - foreach (var year in school.Value) - RankingsAddSingleYearSchool(year, outFolder, school, rankings); - - return rankings; - } - - private static void RankingsAddSingleYearSchool( - KeyValuePair>>> year, - string outFolder, - KeyValuePair< - SchoolEnum, - SortedDictionary>>> - > school, - ICollection rankings - ) - { - var actions = new List(); - foreach (var filename in year.Value) - { - var collection = filename.Value.Select(Selector); - actions.AddRange(collection); - continue; - - Action Selector(KeyValuePair> variable) + foreach (var courseGroup in byTitle) { - return () => { RankingAdd(school, year, outFolder, variable, rankings); }; + var alreadyExisted = coursesDict.ContainsKey(courseGroup.Key); + var courseDict = alreadyExisted + ? coursesDict[courseGroup.Key] + : new CourseDict(); + + foreach (var courseTable in courseGroup) + { + var location = courseTable.GetFixedLocation(); + if (!courseDict.ContainsKey(location)) + { + // first time this location is encountered, + // so we instantiate the list for this location + var newLocationList = new List(); + courseDict.Add(location, newLocationList); + } + + var locationList = courseDict.GetValueOrDefault(location); + if (locationList == null) + throw new UnreachableException(); // this should never happen at this point + + var singleCourseJson = SingleCourseJson.From(ranking, courseTable); + + if (locationList.Any( + x => x.Id == singleCourseJson.Id && x.Location == singleCourseJson.Location)) + continue; + + locationList.Add(singleCourseJson); + locationList.Sort(); + } + + if (!alreadyExisted) coursesDict.Add(courseGroup.Key, courseDict); } } - ParallelRun.Run(actions.ToArray()); - } - - private static void RankingAdd( - KeyValuePair< - SchoolEnum, - SortedDictionary>>> - > school, - KeyValuePair>>> year, - string outFolder, - KeyValuePair> filename, - ICollection rankings - ) - { - foreach (var variable in filename.Value) - RankingAddSingle(school, year, outFolder, rankings, variable); - } - - private static void RankingAddSingle( - KeyValuePair< - SchoolEnum, - SortedDictionary>>> - > school, - KeyValuePair>>> year, - string outFolder, - ICollection rankings, - SingleCourseJson variable - ) - { - var schoolKey = school.Key.ToString(); - var yearKey = year.Key.ToString(); - var path = Path.Join(outFolder, schoolKey, yearKey, variable.Link); - var ranking = Utils.Transformer.ParserNS.Parser.ParseJsonRanking(path); - if (ranking == null) - return; - - lock (rankings) - { - AddToRankings(rankings, ranking); - } - } - - private static void AddToRankings(ICollection rankings, Ranking ranking) - { - var any = rankings.Any( - x => - x.School == ranking.School - && x.Year == ranking.Year - && Similar(x.ByCourse, ranking.ByCourse) - ); - - if (!any) - rankings.Add(ranking); - } - - private static bool Similar( - IReadOnlyCollection? a, - IReadOnlyCollection? b - ) - { - if (a == null || b == null) - return false; - return a.Count == b.Count && a.Select(Selector).All(Predicate); - - bool Selector(CourseTable variable) - { - return b.Any(x => x.Title == variable.Title); - } - - bool Predicate(bool boolB) - { - return boolB; - } + return coursesDict; } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs index d93cd3fa..fd5a1c5f 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs @@ -10,80 +10,74 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; +using SchoolsDict = SortedDictionary>>; +using YearsDict = SortedDictionary>; + [Serializable] [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class BySchoolYearJson : IndexJsonBase { - internal const string PathCustom = "bySchoolYear.json"; + internal const string CustomPath = "bySchoolYear.json"; + public List All = new(); // decide whether to include it in the json serialization - public SortedDictionary>> Schools = new(); + public SchoolsDict Schools = new(); - public static BySchoolYearJson? From(RankingsSet? set) + public static BySchoolYearJson From(RankingsSet set) { - if (set == null) - return null; - var mainJson = new BySchoolYearJson { LastUpdate = set.LastUpdate }; + + var list = set.Rankings + .SelectMany(r => r.ToSingleCourseJson()) + .DistinctBy(r => new { r.Id }) + .ToList(); + + list.Sort(); + mainJson.All = list; + // group rankings by school - var bySchool = set.Rankings.GroupBy(r => r.School); + var bySchool = set.Rankings.Where(r => r.School != null).GroupBy(r => r.School!.Value); foreach (var schoolGroup in bySchool) { - if (schoolGroup.Key is null) - continue; - var school = schoolGroup.Key.Value; + var school = schoolGroup.Key; + var byYears = schoolGroup.Where(r => r.Year != null).GroupBy(r => r.Year!.Value); - var schoolDict = new SortedDictionary>(); - - var byYears = schoolGroup.GroupBy(r => r.Year); - foreach (var yearGroup in byYears) - { - if (yearGroup.Key is null) - continue; - AddSchool(yearGroup, schoolDict); - } - - mainJson.Schools.Add(school, schoolDict); + var yearsDict = GetYearsDict(byYears); + mainJson.Schools.Add(school, yearsDict); } return mainJson; } - private static void AddSchool( - IGrouping yearGroup, - IDictionary> schoolDict - ) + private static YearsDict GetYearsDict(IEnumerable> byYears) { - var yearGroupKey = yearGroup.Key; - if (yearGroupKey == null) - return; + var yearsDict = new YearsDict(); - var singleCourseJsons = yearGroup - .SelectMany(ranking => ranking.ToSingleCourseJson()) - .DistinctBy(x => x.Link) - .ToList(); - var filenames = singleCourseJsons - .OrderBy(a => a.Id) - .ThenBy(a => a.Year) - .ThenBy(a => a.School) - .ThenBy(a => a.BasePath) - .ToList(); + foreach (var yearGroup in byYears) + { + var singleCourseJsons = yearGroup + .SelectMany(r => r.ToSingleCourseJson()) + .DistinctBy(r => r.Id) + .OrderBy(e => e.Id) // Id contains everything (school, year, pri/sec phase, extraeu, lang) + .ToList(); - schoolDict.Add(yearGroupKey.Value, filenames); - } + yearsDict.Add(yearGroup.Key, singleCourseJsons); + } + return yearsDict; + } public static RankingsSet GetAndParse(string dataFolder) { var set = new RankingsSet(); var outFolder = Path.Join(dataFolder, Constants.OutputFolder); - var mainJsonPath = Path.Join(outFolder, PathCustom); + var mainJsonPath = Path.Join(outFolder, CustomPath); try { - var mainJson = Utils.Transformer.ParserNS.Parser.ParseJson(mainJsonPath); - if (mainJson is null) return set; + var index = Utils.Transformer.ParserNS.Parser.ParseJson(mainJsonPath); + if (index is null) return set; - set.LastUpdate = mainJson.LastUpdate; - set.Rankings = GetRankingsFromIndex(mainJson, outFolder); + set.LastUpdate = index.LastUpdate; + set.Rankings = index.GetRankings(outFolder); set.Rankings.Sort(); return set; } @@ -94,37 +88,9 @@ public static RankingsSet GetAndParse(string dataFolder) } } - private static List GetRankingsFromIndex(BySchoolYearJson mainJson, string outFolder) - { - List rankings = new(); - var singleCourseJsons = GetSingleCourseJsons(mainJson).ToList(); - singleCourseJsons.Sort(); - foreach (var filename in singleCourseJsons) - AddRanking(outFolder, filename, rankings); - - return rankings; - } - - private static IEnumerable GetSingleCourseJsons(BySchoolYearJson mainJson) - { - var singleCourseJsons = mainJson.Schools.SelectMany( - school => - { - var courseJsons = school.Value.SelectMany(year => - { - var yearValue = year.Value; - return yearValue; - }); - return courseJsons; - }); - return singleCourseJsons; - } - - private static void AddRanking(string outFolder, SingleCourseJson filename, ICollection rankings) + public List GetRankings(string outFolder) { - var path = Path.Join(outFolder, filename.BasePath, filename.Link); - var ranking = Utils.Transformer.ParserNS.Parser.ParseJsonRanking(path); - if (ranking == null) return; - rankings.Add(ranking); + return All.Select(singleCourseJson => singleCourseJson.GetFullPath(outFolder)).Select(Ranking.FromJson) + .OfType().ToList(); } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs index 7206836c..d1e973b9 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs @@ -2,7 +2,6 @@ using Newtonsoft.Json; using Newtonsoft.Json.Serialization; -using PoliNetwork.Graduatorie.Common.Data; using PoliNetwork.Graduatorie.Common.Enums; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; @@ -10,94 +9,58 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; +using YearsDict = SortedDictionary>>; +using SchoolsDict = SortedDictionary>; + [Serializable] [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class ByYearSchoolJson : IndexJsonBase { - internal const string PathCustom = "byYearSchool.json"; + internal const string CustomPath = "byYearSchool.json"; + public List All = new(); // decide whether to include it in the json serialization - public SortedDictionary>> Years = new(); + public YearsDict Years = new(); - public static ByYearSchoolJson? From(RankingsSet? set) + public static ByYearSchoolJson From(RankingsSet set) { - if (set == null) - return null; - var mainJson = new ByYearSchoolJson { LastUpdate = set.LastUpdate }; + + var list = set.Rankings + .SelectMany(r => r.ToSingleCourseJson()).ToList() + .DistinctBy(r => new { r.Id }) + .ToList(); + + list.Sort(); + mainJson.All = list; + // group rankings by year - var byYear = set.Rankings.GroupBy(r => r.Year); + var byYear = set.Rankings.Where(r => r.Year != null).GroupBy(r => r.Year!.Value); foreach (var yearGroup in byYear) { - if (yearGroup.Key is null) - continue; - var year = yearGroup.Key.Value; + var year = yearGroup.Key; + var bySchools = yearGroup.Where(r => r.School != null).GroupBy(r => r.School!.Value); - var yearDict = new SortedDictionary>(); - - var bySchools = yearGroup.GroupBy(r => r.School); - foreach (var schoolGroup in bySchools) - { - if (schoolGroup.Key is null) - continue; - var filenames = schoolGroup - .SelectMany(ranking => ranking.ToSingleCourseJson()) - .DistinctBy(x => x.Link) - .ToList().OrderBy(a => a.Id); - yearDict.Add(schoolGroup.Key.Value, filenames); - } - - mainJson.Years.Add(year, yearDict); + var schoolsDict = GetSchoolsDict(bySchools); + mainJson.Years.Add(year, schoolsDict); } return mainJson; } - - public static RankingsSet? Parse(string dataFolder) + private static SchoolsDict GetSchoolsDict(IEnumerable> bySchools) { - var outFolder = Path.Join(dataFolder, Constants.OutputFolder); - var mainJsonPath = Path.Join(outFolder, PathCustom); - try + var schoolsDict = new SchoolsDict(); + foreach (var schoolGroup in bySchools) { - var mainJson = Utils.Transformer.ParserNS.Parser.ParseJson(mainJsonPath); - if (mainJson is null) - return null; - - var rankings = RankingsAdd(mainJson, outFolder); + var filenames = schoolGroup + .SelectMany(ranking => ranking.ToSingleCourseJson()) + .DistinctBy(x => x.Link) + .OrderBy(r => r.Id) + .ToList(); - return new RankingsSet { LastUpdate = mainJson.LastUpdate, Rankings = rankings }; - } - catch - { - // ignored + schoolsDict.Add(schoolGroup.Key, filenames); } - return null; - } - - private static List RankingsAdd(ByYearSchoolJson mainJson, string outFolder) - { - List rankings = new(); - foreach (var year in mainJson.Years) - foreach (var school in year.Value) - foreach (var filename in school.Value) - RankingAdd(year, school, outFolder, filename, rankings); - - return rankings; - } - - private static void RankingAdd( - KeyValuePair>> year, - KeyValuePair> school, - string outFolder, - SingleCourseJson filename, - ICollection rankings) - { - var schoolKey = school.Key.ToString(); - var yearKey = year.Key.ToString(); - var path = Path.Join(outFolder, schoolKey, yearKey, filename.Link); - var ranking = Utils.Transformer.ParserNS.Parser.ParseJsonRanking(path); - if (ranking != null) - rankings.Add(ranking); + return schoolsDict; } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs index dcff36b8..ad6842e4 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs @@ -46,6 +46,21 @@ public int CompareTo(SingleCourseJson? singleCourseJson) return 0; } + public static SingleCourseJson From(Ranking ranking, CourseTable? course) + { + var basePath = $"{ranking.School}/{ranking.Year}/"; // "Ingegneria/2023" + return new SingleCourseJson + { + Link = ranking.GetFilename(), + Id = ranking.GetId(), + BasePath = basePath, + Year = ranking.Year, + School = ranking.School, + Location = course?.Location, + RankingOrder = ranking.RankingOrder + }; + } + public int GetHashWithoutLastUpdate() { var hashWithoutLastUpdate = Link?.GetHashCode() ?? "Link".GetHashCode(); @@ -61,4 +76,9 @@ public bool Is(CourseTable courseTable) { return (RankingOrder?.Phase ?? "") == courseTable.Title; } + + public string GetFullPath(string outFolder = "") + { + return Path.Join(outFolder, BasePath, Link); + } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs index f3c394c0..1befb2b1 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs @@ -1,11 +1,13 @@ #region +using System.Diagnostics; using Newtonsoft.Json; using Newtonsoft.Json.Serialization; -using PoliNetwork.Graduatorie.Common.Data; using PoliNetwork.Graduatorie.Common.Objects; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; +// ReSharper disable CanSimplifyDictionaryLookupWithTryAdd + #endregion namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Stats; @@ -14,106 +16,33 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Stats; [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class StatsJson { - private const string PathStats = "stats"; + private const string StatsFolderName = "stats"; public DateTime LastUpdate = DateTime.UtcNow; public SortedDictionary Stats = new(); - public static void Write(string outFolder, RankingsSet? rankingsSet, ArgsConfig argsConfig) - { - var statsJson = Generate(rankingsSet); - if (statsJson == null) return; - foreach (var yearDict in statsJson.Stats) WriteToFileYear(outFolder, yearDict, argsConfig); - } - - private static StatsJson? Generate(RankingsSet? rankingsSet) + public static StatsJson From(RankingsSet rankingsSet) { - if (rankingsSet == null) - return null; - var statsJson = new StatsJson(); - foreach (var ranking in rankingsSet.Rankings) GenerateSingleRanking(rankingsSet, ranking, statsJson); - - foreach (var year in statsJson.Stats.Keys) - foreach (var school in statsJson.Stats[year].Schools.Keys) - { - var statsSingleCourseJsons = - statsJson.Stats[year].Schools[school].List.OrderBy(x => x.SingleCourseJson?.Link); - statsJson.Stats[year].Schools[school].List = statsSingleCourseJsons.ToList(); - } - - return statsJson; - } - private static void GenerateSingleRanking(RankingsSet? rankingsSet, Ranking ranking, StatsJson? statsJson) - { - if (rankingsSet == null) - return; - if (statsJson == null) - return; - - if (ranking.Year == null) return; - if (!statsJson.Stats.ContainsKey(ranking.Year.Value)) + var byYears = rankingsSet.Rankings.Where(r => r.Year != null).GroupBy(r => r.Year!.Value); + foreach (var yearGroup in byYears) { - var statsJsonStat = new StatsYear - { - NumStudents = rankingsSet.Rankings.Where(x => x.Year == ranking.Year) - .Select(x => x.RankingSummary?.HowManyStudents).Sum() - }; - statsJson.Stats[ranking.Year.Value] = statsJsonStat; - } + var statsYear = StatsYear.From(yearGroup.ToList()); - if (ranking.School == null) return; - var schools = statsJson.Stats[ranking.Year.Value].Schools; - if (!schools.ContainsKey(ranking.School.Value)) - { - var statsSchool = new StatsSchool - { - NumStudents = rankingsSet.Rankings - .Where(x => x.Year == ranking.Year && x.School == ranking.School) - .Select(x => x.RankingSummary?.HowManyStudents).Sum() - }; - schools[ranking.School.Value] = statsSchool; + if (statsJson.Stats.ContainsKey(yearGroup.Key)) throw new UnreachableException(); // should be impossible + statsJson.Stats.Add(yearGroup.Key, statsYear); } - var statsSingleCourseJsons = ranking.ToStats().DistinctBy(x => new - { - x.SingleCourseJson?.Link, x.SingleCourseJson?.Location - }); - foreach (var variable in statsSingleCourseJsons) schools[ranking.School.Value].List.Add(variable); - } - - private static void WriteToFileYear(string outFolder, KeyValuePair yearDict, ArgsConfig argsConfig) - { - var statsPath = Path.Join(outFolder, PathStats); - if (!Directory.Exists(statsPath)) Directory.CreateDirectory(statsPath); - - var jsonPath = Path.Join(statsPath, yearDict.Key + ".json"); - if (ExitIfThereIsntAnUpdate(jsonPath, yearDict.Value) && !argsConfig.ForceReparsing) return; - - var jsonString = JsonConvert.SerializeObject(yearDict.Value, Culture.JsonSerializerSettings); - File.WriteAllText(jsonPath, jsonString); + return statsJson; } - private static bool ExitIfThereIsntAnUpdate(string jsonPath, StatsYear variableValue) + public void Write(string outFolder, ArgsConfig argsConfig) { - try - { - if (!File.Exists(jsonPath)) return false; - - var read = File.ReadAllText(jsonPath); - var jsonRead = JsonConvert.DeserializeObject(read, Culture.JsonSerializerSettings); - var hashRead = jsonRead?.GetHashWithoutLastUpdate(); - var hashThis = variableValue.GetHashWithoutLastUpdate(); - - return hashRead == hashThis; - } - catch (Exception ex) - { - Console.WriteLine(ex); - } + var statsFolderPath = Path.Join(outFolder, StatsFolderName); + if (!Directory.Exists(statsFolderPath)) Directory.CreateDirectory(statsFolderPath); - return false; + foreach (var yearStats in Stats.Values) yearStats.Write(statsFolderPath, argsConfig); } public int GetHashWithoutLastUpdate() diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs index 8b2339eb..0a80f736 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs @@ -2,6 +2,9 @@ using Newtonsoft.Json; using Newtonsoft.Json.Serialization; +using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; + +// ReSharper disable CanSimplifyDictionaryLookupWithTryAdd #endregion @@ -12,11 +15,28 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Stats; public class StatsSchool { public List List = new(); - public int? NumStudents; + public int NumStudents; + + public static StatsSchool From(IEnumerable pRankings) + { + var statsSchool = new StatsSchool(); + var rankings = pRankings.Where(r => r is { Year: not null, School: not null }).ToList(); + + statsSchool.NumStudents = + rankings.Select(x => (x.RankingSummary ?? x.CreateSummary()).HowManyStudents ?? 0).Sum(); + + statsSchool.List = rankings + .SelectMany(r => r.ToStats()) + .DistinctBy(x => new { x.SingleCourseJson.Id, x.SingleCourseJson.Location }) + .OrderBy(x => x.SingleCourseJson.Id) + .ToList(); + + return statsSchool; + } public int GetHashWithoutLastUpdate() { - var i = NumStudents ?? "NumStudents".GetHashCode(); + var i = NumStudents; return List.Aggregate(i, (current, variable) => current ^ variable.GetHashWithoutLastUpdate()); } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs index 8b503b81..07ca9f28 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs @@ -12,21 +12,27 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Stats; [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class StatsSingleCourseJson { - public SingleCourseJson? SingleCourseJson; - public RankingSummary? Stats; + public SingleCourseJson SingleCourseJson; + public RankingSummary Stats; + + public StatsSingleCourseJson(SingleCourseJson singleCourseJson, RankingSummary stats) + { + SingleCourseJson = singleCourseJson; + Stats = stats; + } public static List From(Ranking ranking) { var singleCourseJsons = ranking.ToSingleCourseJson(); - return singleCourseJsons.Select(variable => new StatsSingleCourseJson - { SingleCourseJson = variable, Stats = ranking.RankingSummary }).ToList(); + ranking.RankingSummary ??= ranking.CreateSummary(); + return singleCourseJsons.Select(scj => new StatsSingleCourseJson(scj, ranking.RankingSummary)).ToList(); } public int GetHashWithoutLastUpdate() { var i = "StatsSingleCourseJson".GetHashCode(); - i ^= SingleCourseJson?.GetHashWithoutLastUpdate() ?? "SingleCourseJson".GetHashCode(); - i ^= Stats?.GetHashWithoutLastUpdate() ?? "Stats".GetHashCode(); + i ^= SingleCourseJson.GetHashWithoutLastUpdate(); + i ^= Stats.GetHashWithoutLastUpdate(); return i; } diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs index a31c7f11..53d4805a 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs @@ -1,23 +1,86 @@ #region +using System.Diagnostics; using Newtonsoft.Json; using Newtonsoft.Json.Serialization; +using PoliNetwork.Graduatorie.Common.Data; using PoliNetwork.Graduatorie.Common.Enums; +using PoliNetwork.Graduatorie.Common.Objects; +using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; + +// ReSharper disable CanSimplifyDictionaryLookupWithTryAdd #endregion namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Stats; +using SchoolsDict = SortedDictionary; + [Serializable] [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class StatsYear { - public int? NumStudents; - public SortedDictionary Schools = new(); + public int NumStudents; + public SchoolsDict Schools = new(); + public int Year; + + public static StatsYear From(List rankings) + { + var statsYear = new StatsYear + { + Year = rankings.First(r => r.Year != null).Year!.Value, // just hilarious + NumStudents = rankings.Select(r => (r.RankingSummary ?? r.CreateSummary()).HowManyStudents ?? 0) + .Sum() // this ?? is crazy + }; + + var bySchool = rankings.Where(r => r.School != null).GroupBy(r => r.School!.Value); + foreach (var schoolGroup in bySchool) + { + var statsSchool = StatsSchool.From(schoolGroup.ToList()); + + if (statsYear.Schools.ContainsKey(schoolGroup.Key)) + throw new UnreachableException(); // should be impossible, right? + statsYear.Schools.Add(schoolGroup.Key, statsSchool); + } + + return statsYear; + } + + public void Write(string statsFolderPath, ArgsConfig argsConfig) + { + var fullJsonPath = Path.Join(statsFolderPath, $"{Year}.json"); + if (ExitIfThereIsntAnUpdate(fullJsonPath) && !argsConfig.ForceReparsing) return; + + var jsonString = JsonConvert.SerializeObject(this, Culture.JsonSerializerSettings); + File.WriteAllText(fullJsonPath, jsonString); + } + + private bool ExitIfThereIsntAnUpdate(string fullJsonPath) + { + try + { + if (!File.Exists(fullJsonPath)) return false; + + var saved = File.ReadAllText(fullJsonPath); + var savedStats = JsonConvert.DeserializeObject(saved, Culture.JsonSerializerSettings); + + if (string.IsNullOrEmpty(saved) || savedStats == null) return false; + + var savedHash = savedStats.GetHashWithoutLastUpdate(); + var hash = GetHashWithoutLastUpdate(); + return savedHash == hash; + } + catch (Exception ex) + { + Console.WriteLine(ex); + } + + return false; + } public int GetHashWithoutLastUpdate() { - var i = NumStudents ?? "NumStudents".GetHashCode(); + var i = NumStudents; var enumerable = from variable in Schools let variableKey = (int)variable.Key diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs index ddea20f2..2eac3d2f 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs @@ -3,6 +3,7 @@ using System.Globalization; using Newtonsoft.Json; using Newtonsoft.Json.Serialization; +using PoliNetwork.Graduatorie.Common.Data; using PoliNetwork.Graduatorie.Common.Enums; using PoliNetwork.Graduatorie.Common.Objects.RankingNS; using PoliNetwork.Graduatorie.Parser.Objects.Json; @@ -17,7 +18,7 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.RankingNS; [Serializable] [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] -public class Ranking : IComparable +public class Ranking : IComparable, IEquatable { public List? ByCourse; public MeritTable? ByMerit; @@ -37,39 +38,28 @@ public int CompareTo(Ranking? other) return string.Compare(GetId(), other.GetId(), StringComparison.Ordinal); } + + public bool Equals(Ranking? other) + { + if (other == null) return false; + return GetHashWithoutLastUpdate() == other.GetHashWithoutLastUpdate(); + } + public RankingSummaryStudent GetRankingSummaryStudent() { return new RankingSummaryStudent(RankingOrder?.Phase, School, Year, Url); } - - /*** - * Ottieni l'hash senza considerare il valore di LastUpdate - */ - public int GetHashWithoutLastUpdate() + public static Ranking? FromJson(string fullPath) { - var i = "Ranking".GetHashCode(); - i ^= Extra?.GetHashCode() ?? "Extra".GetHashCode(); - i ^= RankingOrder?.GetHashWithoutLastUpdate() ?? "RankingOrder".GetHashCode(); - i ^= RankingSummary?.GetHashWithoutLastUpdate() ?? "RankingSummary".GetHashCode(); - i ^= School?.GetHashCode() ?? "School".GetHashCode(); - i ^= Url?.GetHashWithoutLastUpdate() ?? "Url".GetHashCode(); - i ^= Year?.GetHashCode() ?? "Year".GetHashCode(); - var iMerit = ByMerit?.GetHashWithoutLastUpdate(); - i ^= iMerit ?? "ByMerit".GetHashCode(); - - - if (ByCourse == null) - i ^= "ByCourse".GetHashCode(); - else - i = ByCourse.Aggregate(i, (current, variable) => - { - var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate(); - var iList = hashWithoutLastUpdate; - return current ^ iList; - }); - - return i; + // if (!File.Exists(fullPath)) return null; + // + // var str = File.ReadAllText(fullPath); + // var ranking = JsonConvert.DeserializeObject(str, Culture.JsonSerializerSettings); + // return ranking; + + // consider merging the two functions at some point + return Utils.Transformer.ParserNS.Parser.ParseJsonRanking(fullPath); } @@ -83,26 +73,6 @@ public bool IsSimilarTo(Ranking ranking) } - public void Merge(Ranking ranking) - { - LastUpdate = LastUpdate > ranking.LastUpdate ? LastUpdate : ranking.LastUpdate; - Year ??= ranking.Year; - Extra ??= ranking.Extra; - School ??= ranking.School; - MergeRankingOrder(ranking); - ByCourse ??= ranking.ByCourse; - ByMerit ??= ranking.ByMerit; - Url ??= ranking.Url; - } - - private void MergeRankingOrder(Ranking ranking) - { - if (RankingOrder == null) - RankingOrder = ranking.RankingOrder; - else - RankingOrder.Merge(ranking.RankingOrder); - } - public string GetFilename() { var id = GetId(); @@ -158,8 +128,67 @@ public RankingSummary CreateSummary() return RankingSummary.From(this); } - public string GetPath() + public string GetBasePath(string outFolder = "") + { + return Path.Join(outFolder, $"{School}/{Year}/"); + } + + public string GetFullPath(string outFolder = "") + { + return Path.Join(GetBasePath(outFolder), GetFilename()); + } + + public void WriteAsJson(string outFolder, bool forceReparse = false) + { + var folderPath = GetBasePath(outFolder); + Directory.CreateDirectory(folderPath); + + var fullPath = GetFullPath(outFolder); + + var savedRanking = FromJson(fullPath); + var equalsSaved = savedRanking != null && Equals(savedRanking); + + if (!forceReparse && equalsSaved) return; + + var rankingJsonString = JsonConvert.SerializeObject(this, Culture.JsonSerializerSettings); + File.WriteAllText(fullPath, rankingJsonString); + } + + /*** + * Ottieni l'hash senza considerare il valore di LastUpdate + */ + public int GetHashWithoutLastUpdate() + { + var i = "Ranking".GetHashCode(); + i ^= Extra?.GetHashCode() ?? "Extra".GetHashCode(); + i ^= RankingOrder?.GetHashWithoutLastUpdate() ?? "RankingOrder".GetHashCode(); + i ^= RankingSummary?.GetHashWithoutLastUpdate() ?? "RankingSummary".GetHashCode(); + i ^= School?.GetHashCode() ?? "School".GetHashCode(); + i ^= Url?.GetHashWithoutLastUpdate() ?? "Url".GetHashCode(); + i ^= Year?.GetHashCode() ?? "Year".GetHashCode(); + var iMerit = ByMerit?.GetHashWithoutLastUpdate(); + i ^= iMerit ?? "ByMerit".GetHashCode(); + + + if (ByCourse == null) + i ^= "ByCourse".GetHashCode(); + else + i = ByCourse.Aggregate(i, (current, variable) => + { + var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate(); + return current ^ hashWithoutLastUpdate; + }); + + return i; + } + + public override bool Equals(object? obj) + { + return Equals(obj as Ranking); + } + + public override int GetHashCode() { - return School + "/" + Year + "/" + RankingOrder?.Phase; + return GetHashWithoutLastUpdate(); } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs index 561908ca..891e274f 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs @@ -101,11 +101,4 @@ public int GetHashWithoutLastUpdate() return i; } - - public void Merge(RankingOrder? rankingRankingOrder) - { - Phase ??= rankingRankingOrder?.Phase; - Primary ??= rankingRankingOrder?.Primary; - Secondary ??= rankingRankingOrder?.Secondary; - } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingSummary.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingSummary.cs index 26b09dd8..fb4cc3cc 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingSummary.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingSummary.cs @@ -57,9 +57,7 @@ public static RankingSummary From(Ranking ranking) titleLocation.Location = x.Key.Location; return titleLocation; }); - var tableStatsList = - distinctBy - ?.ToList(); + var tableStatsList = distinctBy?.ToList(); var tableStatsList2 = Get(tableStatsList); var resultsSummarized = new SortedDictionary(keyValuePairs ?? new Dictionary()); return new RankingSummary diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs index fbcae97f..d129cfa4 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs @@ -11,32 +11,24 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.RankingNS; [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class RankingsSet { - public DateTime? LastUpdate; - public List Rankings; + public DateTime? LastUpdate = DateTime.UtcNow; + public List Rankings = new(); - public RankingsSet() + public void Merge(RankingsSet newSet) { - Rankings = new List(); - LastUpdate = DateTime.UtcNow; - } - - - public void AddRanking(Ranking ranking) - { - var alreadyPresent = Contains(ranking); - if (!alreadyPresent) + foreach (var ranking in newSet.Rankings) + { + var alreadyPresent = Rankings.Any(v => v.IsSimilarTo(ranking)); + if (alreadyPresent) continue; Rankings.Add(ranking); - if (LastUpdate == null || ranking.LastUpdate.Date > LastUpdate?.Date) LastUpdate = ranking.LastUpdate; - } - - public bool Contains(Ranking ranking) - { - return Rankings.Any(v => v.IsSimilarTo(ranking)); + if (LastUpdate == null || ranking.LastUpdate.Date > LastUpdate?.Date) + LastUpdate = ranking.LastUpdate; + } } - public void Merge(RankingsSet set) + public void WriteAllRankings(string outFolder, bool forceReparse = false) { - foreach (var ranking in set.Rankings) AddRanking(ranking); + foreach (var ranking in Rankings) ranking.WriteAsJson(outFolder, forceReparse); } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs b/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs index d9ef3c26..4e6360fa 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs @@ -2,6 +2,7 @@ using Newtonsoft.Json; using Newtonsoft.Json.Serialization; +using PoliNetwork.Graduatorie.Common.Data; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit; using PoliNetwork.Graduatorie.Parser.Utils.Output; @@ -28,4 +29,18 @@ public RankingSummaryStudent GetRankingSummaryStudent(Ranking ranking) return new RankingSummaryStudent(Title, ranking.RankingOrder?.Phase, ranking.School, ranking.Url, ranking.Year); } + + /// + /// Get the course location if present, otherwise get the placeholder (constant). + /// Useful for index purposes. + /// + /// A string with the location or the placeholder + public string GetFixedLocation() + { + // fixedLocation + // esempio: Urbanistica 2022 ha un solo corso senza location, ma anche quello + // deve comparire nella lista + // fix: se un corso non ha location, si inserisce un valore 0 + return string.IsNullOrEmpty(Location) ? Constants.LocationPlaceholder : Location; + } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Utils/DateFoundUtil.cs b/PoliNetwork.Graduatorie.Parser/Utils/DateFoundUtil.cs index f64f8c2b..a4566b47 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/DateFoundUtil.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/DateFoundUtil.cs @@ -11,19 +11,18 @@ namespace PoliNetwork.Graduatorie.Parser.Utils; public static class DateFoundUtil { - public static DateFound GetDateFound(ArgsConfig argsConfig, RankingsSet? rankingsSet) + public static DateFound GetDateFound(ArgsConfig argsConfig, RankingsSet rankingsSet) { var dateFound = GetDateFoundFromFile(argsConfig.DataFolder); dateFound = UpdateDateFound(rankingsSet, dateFound); return dateFound; } - private static DateFound UpdateDateFound(RankingsSet? rankingsSet, DateFound? dateFound) + private static DateFound UpdateDateFound(RankingsSet rankingsSet, DateFound? dateFound) { dateFound ??= new DateFound(); - var rankingsSetRankings = rankingsSet?.Rankings; - if (rankingsSetRankings == null) return dateFound; + var rankingsSetRankings = rankingsSet.Rankings; foreach (var variable in rankingsSetRankings) dateFound.UpdateDateFound(variable); return dateFound; diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs index 76a10f20..80396565 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs @@ -2,108 +2,95 @@ using Newtonsoft.Json; using PoliNetwork.Graduatorie.Common.Data; -using PoliNetwork.Graduatorie.Parser.Objects; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; -using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; #endregion namespace PoliNetwork.Graduatorie.Parser.Utils.Output; -public static class HashMatricoleWrite +using IdsDict = SortedDictionary; + +public class HashMatricoleWrite { - public static void Write(RankingsSet? rankingsSet, string outFolder) - { - if (rankingsSet == null) - return; + private const string FolderName = "hashMatricole"; + private IdsDict _idsDict = new(); - var dictionary = GetDictToWrite(rankingsSet); - Sort2(dictionary); - WriteToFile(dictionary, outFolder); + public static HashMatricoleWrite From(RankingsSet rankingsSet) + { + return new HashMatricoleWrite + { + _idsDict = GetIdsDict(rankingsSet) + }; } - private static void Sort2(SortedDictionary dict) + + public void Write(string outFolder) { - var keys = dict.Keys; - foreach (var key in keys) + Console.WriteLine($"[INFO] Students with id are {_idsDict.Keys.Count}"); + + var groupsDict = GetGroupsDict(); + var hashMatricoleFolder = Path.Join(outFolder, FolderName); + if (!Directory.Exists(hashMatricoleFolder)) Directory.CreateDirectory(hashMatricoleFolder); + + foreach (var (id, idsDict) in groupsDict) { - var item = dict[key]; - item.Sort2(); + var idsDictJson = JsonConvert.SerializeObject(idsDict, Culture.JsonSerializerSettings); + var filename = $"{id}.json"; + var fullPath = Path.Join(hashMatricoleFolder, filename); + File.WriteAllText(fullPath, idsDictJson); } } - private static SortedDictionary GetDictToWrite(RankingsSet rankingsSet) + private static IdsDict GetIdsDict(RankingsSet rankingsSet) { - var dictionary = new SortedDictionary(); + var dictionary = new IdsDict(); foreach (var ranking in rankingsSet.Rankings) { var byMeritRows = ranking.ByMerit?.Rows; if (byMeritRows != null) foreach (var student in byMeritRows.Where(student => !string.IsNullOrEmpty(student.Id))) - AddToDict(dictionary, ranking, student, null); + { + var id = student.Id!; + if (!dictionary.ContainsKey(id)) dictionary.Add(id, new StudentHashSummary()); + dictionary[id].Merge(student, ranking, null); + } var rankingByCourse = ranking.ByCourse; if (rankingByCourse == null) continue; - foreach (var courseTable in rankingByCourse) + foreach (var courseTable in rankingByCourse.Where(c => c.Rows != null)) { - var row = courseTable.Rows; - if (row == null) continue; - foreach (var studentResult in row.Where(studentResult => !string.IsNullOrEmpty(studentResult.Id))) - AddToDict(dictionary, ranking, studentResult, courseTable); + var row = courseTable.Rows!; + foreach (var student in row.Where(studentResult => !string.IsNullOrEmpty(studentResult.Id))) + { + var id = student.Id!; + + if (!dictionary.ContainsKey(id)) dictionary.Add(id, new StudentHashSummary()); + dictionary[id].Merge(student, ranking, courseTable); + } } } + foreach (var item in dictionary.Values) item.Sort(); + return dictionary; } - private static void WriteToFile(SortedDictionary dictionary, string outFolder) + private SortedDictionary GetGroupsDict() { - Console.WriteLine($"[INFO] Students with id are {dictionary.Keys.Count}"); - + var groupsDict = new SortedDictionary(); + var groups = _idsDict.GroupBy(pair => pair.Key[..2]); - var dictResult = - new SortedDictionary>(); - - foreach (var variable in dictionary) + foreach (var group in groups) { - var key = variable.Key[..2]; - if (!dictResult.ContainsKey(key)) - dictResult[key] = new SortedDictionary(); - - if (!dictResult[key].ContainsKey(variable.Key)) - dictResult[key][variable.Key] = variable.Value; - } + var groupId = group.Key; + var groupVal = group.ToList(); - var hashMatricole = outFolder + "/hashMatricole"; - if (!Directory.Exists(hashMatricole)) Directory.CreateDirectory(hashMatricole); + var groupIdsDict = new IdsDict(); + foreach (var (id, studentHashSummary) in groupVal) groupIdsDict.Add(id, studentHashSummary); - foreach (var variable in dictResult) WriteSingleHashFile(variable, hashMatricole); - } - - private static void WriteSingleHashFile(KeyValuePair> variable, - string hashMatricole) - { - var studentHashSummaries = variable.Value; - var toWrite = JsonConvert.SerializeObject(studentHashSummaries, Culture.JsonSerializerSettings); - File.WriteAllText(hashMatricole + "/" + variable.Key + ".json", toWrite); - } - - private static void AddToDict(IDictionary dictionary, Ranking ranking, - StudentResult student, CourseTable? courseTable) - { - var id = student.Id; - if (string.IsNullOrEmpty(id)) - return; - - if (dictionary.TryGetValue(id, out var studentPresent)) - { - studentPresent.Merge(student, ranking, courseTable); - } - else - { - var studentHashSummary = new StudentHashSummary(); - studentHashSummary.Merge(student, ranking, courseTable); - dictionary[id] = studentHashSummary; + groupsDict.Add(groupId, groupIdsDict); } + + return groupsDict; } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs index 54b4dd47..3e294db8 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs @@ -20,12 +20,14 @@ public OutputWriteUtil(ArgsConfig argsConfig) _config = argsConfig; } - public void SaveOutputs(RankingsSet? rankingsSet, DateFound dateFound) + public void SaveOutputs(RankingsSet rankingsSet, DateFound dateFound) { var outFolder = Path.Join(_config.DataFolder, Constants.OutputFolder); - IndexJsonBase.IndexesWrite(rankingsSet, outFolder, _config); - StatsJson.Write(outFolder, rankingsSet, _config); - HashMatricoleWrite.Write(rankingsSet, outFolder); + + rankingsSet.WriteAllRankings(outFolder, _config.ForceReparsing); + IndexJsonBase.WriteAllIndexes(rankingsSet, outFolder); + StatsJson.From(rankingsSet).Write(outFolder, _config); + HashMatricoleWrite.From(rankingsSet).Write(outFolder); dateFound.WriteToFile(_config.DataFolder); } diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/RankingSummaryStudent.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/RankingSummaryStudent.cs index 77f088cd..52e783ef 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/RankingSummaryStudent.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/RankingSummaryStudent.cs @@ -11,7 +11,7 @@ namespace PoliNetwork.Graduatorie.Parser.Utils.Output; [Serializable] [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] -public class RankingSummaryStudent +public class RankingSummaryStudent : IEquatable { public readonly string? Course; public readonly string? Phase; @@ -40,6 +40,14 @@ public RankingSummaryStudent(string? phase, SchoolEnum? school, int? year, Ranki Url = url; } + public bool Equals(RankingSummaryStudent? other) + { + if (ReferenceEquals(null, other)) return false; + if (ReferenceEquals(this, other)) return true; + return Course == other.Course && Phase == other.Phase && School == other.School && Equals(Url, other.Url) && + Year == other.Year; + } + public int Compare(RankingSummaryStudent o) { var i = (Year ?? 0) - (o.Year ?? 0); @@ -70,14 +78,8 @@ public override bool Equals(object? obj) Year == rankingSummaryStudent.Year && equals && Course == rankingSummaryStudent.Course; } - protected bool Equals(RankingSummaryStudent other) - { - return Phase == other.Phase && School == other.School && Year == other.Year && Equals(Url, other.Url) && - Course == other.Course; - } - public override int GetHashCode() { - return HashCode.Combine(Phase, School, Year, Url, Course); + return HashCode.Combine(Course, Phase, School, Url, Year); } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/StudentHashSummary.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/StudentHashSummary.cs index c16c963a..db7eb69f 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/StudentHashSummary.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/StudentHashSummary.cs @@ -31,25 +31,18 @@ public void Merge(StudentResult student, Ranking ranking, CourseTable? courseTab else { var s = courseTable.GetRankingSummaryStudent(ranking); - var present1 = SingleCourseJsons.Any(x => x.Equals(s)); - if (!present1) - SingleCourseJsons.Add(s); + var alreadyPresentJson = SingleCourseJsons.Any(x => x.Equals(s)); + if (!alreadyPresentJson) SingleCourseJsons.Add(s); } var r = ranking.GetRankingSummaryStudent(); - var present2 = RankingSummaries.Any(x => x.Equals(r)); - if (!present2) - RankingSummaries.Add(r); + var alreadyPresentSummary = RankingSummaries.Any(x => x.Equals(r)); + if (!alreadyPresentSummary) RankingSummaries.Add(r); } - public void Sort2() + public void Sort() { - RankingSummaries.Sort(Compare); - SingleCourseJsons.Sort(Compare); - } - - private int Compare(RankingSummaryStudent a, RankingSummaryStudent b) - { - return a.Compare(b); + RankingSummaries.Sort(); + SingleCourseJsons.Sort(); } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Converter.cs b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Converter.cs deleted file mode 100644 index 05c3404e..00000000 --- a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Converter.cs +++ /dev/null @@ -1,49 +0,0 @@ -#region - -using PoliNetwork.Graduatorie.Common.Utils; -using PoliNetwork.Graduatorie.Parser.Objects; -using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; -using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit; - -#endregion - -namespace PoliNetwork.Graduatorie.Parser.Utils.Transformer.ParserNS; - -public static class Converter -{ - public static StudentResult FromMeritTableToStudentResult(MeritTableRow row) - { - var rowCanEnroll = row.CanEnroll ?? false; - var rowCanEnrollInto = rowCanEnroll ? row.CanEnrollInto : null; - return new StudentResult - { - Id = row.Id, - Ofa = row.Ofa, - Result = row.Result, - BirthDate = null, - PositionAbsolute = row.Position, - PositionCourse = null, - SectionsResults = null, - EnglishCorrectAnswers = null, - EnrollType = EnrollUtil.GetEnrollType(rowCanEnrollInto, rowCanEnroll) - }; - } - - - public static StudentResult FromCourseTableRowToStudentResult(CourseTableRow row, Table course) - { - var rowCanEnroll = row.CanEnroll ?? false; - return new StudentResult - { - Id = row.Id, - Ofa = row.Ofa, - Result = row.Result, - BirthDate = row.BirthDate, - EnrollType = EnrollUtil.GetEnrollType(course.CourseTitle, rowCanEnroll), - PositionAbsolute = null, - PositionCourse = row.Position, - SectionsResults = row.SectionsResults, - EnglishCorrectAnswers = row.EnglishCorrectAnswers - }; - } -} \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs index 106e9500..b986614d 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs @@ -8,7 +8,6 @@ using PoliNetwork.Graduatorie.Common.Extensions; using PoliNetwork.Graduatorie.Common.Objects; using PoliNetwork.Graduatorie.Common.Objects.RankingNS; -using PoliNetwork.Graduatorie.Common.Utils; using PoliNetwork.Graduatorie.Common.Utils.HashNS; using PoliNetwork.Graduatorie.Parser.Objects; using PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; @@ -446,7 +445,7 @@ private static List ParseMeritTable(Table> table) foreach (var row in table.Data) { - var id = HashMatricola.HashMatricolaMethod(Table.GetFieldByIndex(row, idIndex)); + var id = HashMatricola.Get(Table.GetFieldByIndex(row, idIndex)); var votoTest = Table.GetFieldByIndex(row, votoTestIndex) ?? "0"; var enrollCourse = Table.GetFieldByIndex(row, corsoIndex) ?? ""; var position = Table.GetFieldByIndex(row, posIndex) ?? "-1"; @@ -530,7 +529,7 @@ private static List ParseCourseTable(Table> table) SortedDictionary? sectionsIndex ) { - var id = HashMatricola.HashMatricolaMethod(Table.GetFieldByIndex(row, idIndex)); + var id = HashMatricola.Get(Table.GetFieldByIndex(row, idIndex)); var votoTestString = Table.GetFieldByIndex(row, votoTestIndex)?.Replace(",", ".") ?? "0"; var votoTest = Convert.ToDecimal(votoTestString, Culture.NumberFormatInfo); @@ -607,7 +606,7 @@ Table course Ofa = row.Ofa, Result = row.Result, BirthDate = row.BirthDate, - EnrollType = EnrollUtil.GetEnrollType(course.CourseTitle, canEnroll), + EnrollType = EnrollType.From(course.CourseTitle, canEnroll), PositionCourse = row.Position, SectionsResults = row.SectionsResults, EnglishCorrectAnswers = row.EnglishCorrectAnswers @@ -621,7 +620,7 @@ Table course return student; student.PositionAbsolute = meritRow.Position; - student.EnrollType = EnrollUtil.GetEnrollType(meritRow.CanEnrollInto, canEnroll); + student.EnrollType = EnrollType.From(meritRow.CanEnrollInto, canEnroll); return student; } @@ -644,7 +643,7 @@ IEnumerable courses var canEnroll = row.CanEnroll ?? false; var student = new StudentResult { - EnrollType = EnrollUtil.GetEnrollType(row.CanEnrollInto, canEnroll), + EnrollType = EnrollType.From(row.CanEnrollInto, canEnroll), Id = row.Id, PositionAbsolute = row.Position, Result = row.Result, diff --git a/PoliNetwork.Graduatorie.Scraper/Main/Program.cs b/PoliNetwork.Graduatorie.Scraper/Main/Program.cs index d5462023..9a2b2771 100644 --- a/PoliNetwork.Graduatorie.Scraper/Main/Program.cs +++ b/PoliNetwork.Graduatorie.Scraper/Main/Program.cs @@ -27,21 +27,12 @@ public static List RankingsUrls(Metrics mt, ArgsConfig argsConfig) var rankingsUrls = mt.Execute(LinksFind.GetAll).ToList(); rankingsUrls = ScraperOutput.GetWithUrlsFromLocalFileLinks(rankingsUrls, argsConfig.DataFolder); - // save result - PrintAndWriteResults(rankingsUrls, argsConfig); + PrintLinks(rankingsUrls); + ScraperOutput.Write(rankingsUrls, argsConfig.DataFolder); // save results return rankingsUrls; } - private static void PrintAndWriteResults(List rankingsUrls, ArgsConfig argsConfig) - { - //write results to file - ScraperOutput.Write(rankingsUrls, argsConfig.DataFolder); - - //print links found - PrintLinks(rankingsUrls); - } - private static void PrintLinks(List rankingsUrls) { foreach (var r in rankingsUrls) diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs index ab375a6c..c5dbdc8c 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs @@ -9,23 +9,6 @@ namespace PoliNetwork.Graduatorie.Scraper.Utils.Web; public static class CheckUrlUtil { - private static void CheckUrl(RankingUrl variable, HashSet final) - { - try - { - var x = UrlUtils.CheckUrl(variable); - if (!x) return; - lock (final) - { - final.Add(variable); - } - } - catch (Exception exception) - { - Console.WriteLine(exception); - } - } - public static HashSet GetRankingLinks(IEnumerable rankingsLinks) { var parallelQuery = rankingsLinks @@ -36,18 +19,35 @@ public static HashSet GetRankingLinks(IEnumerable rankingsLi return GetRankingLinksHashSet(parallelQuery); } - public static HashSet GetRankingLinksHashSet(IEnumerable parallelQuery) + public static HashSet GetRankingLinksHashSet(IEnumerable urls) { - var final = new HashSet(); + var hashSet = new HashSet(); - var action = parallelQuery.Select((Func)Selector).ToArray(); - Parallel.Invoke(action); + var actions = urls.Select((Func)Selector).ToArray(); + Parallel.Invoke(actions); - return final; + return hashSet; - Action Selector(RankingUrl variable) + Action Selector(RankingUrl url) { - return () => { CheckUrl(variable, final); }; + return () => { CheckUrl(url, hashSet); }; + } + } + + private static void CheckUrl(RankingUrl url, HashSet hashSet) + { + try + { + var x = UrlUtils.CheckUrl(url); + if (!x) return; + lock (hashSet) + { + hashSet.Add(url); + } + } + catch (Exception exception) + { + Console.WriteLine(exception); } } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs index 4c32c1a3..24ca314e 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs @@ -9,79 +9,44 @@ namespace PoliNetwork.Graduatorie.Scraper.Utils.Web; public static class ScraperOutput { - private static string GetFilePath(string? docFolder) - { - return docFolder + "/" + Constants.OutputLinksFilename; - } - - public static List GetWithUrlsFromLocalFileLinks(IEnumerable urls, string? dataFolder) + public static List GetWithUrlsFromLocalFileLinks(IEnumerable urls, string dataFolder) { var links = GetSaved(dataFolder); links.AddRange(urls); - return Distinct(links); + return links.DistinctBy(r => r.Url).ToList(); } - private static List Distinct(IEnumerable links) - { - var list = new List(); - var rankingUrls = links.Where(variable => list.All(x => x.Url != variable.Url)); - list.AddRange(rankingUrls); - return list; - } - - private static List GetSaved(string? dataFolder) + private static List GetSaved(string dataFolder) { List list = new(); var filePath = GetFilePath(dataFolder); if (!File.Exists(filePath)) return list; - var lines = GetLines(filePath); - if (lines == null) - { - // consider to handle them - Console.WriteLine($"[ERROR] Can't read the ScraperOutput file ({filePath})"); - return list; - } - + var urls = GetLines(filePath); try { - foreach (var variable in lines) RankingFromAdd(variable, list); + return urls.Select(RankingUrl.From).ToList(); } catch { // consider to handle them Console.WriteLine($"[ERROR] Can't validate the ScraperOutput file ({filePath})"); + return new List(); } - - return list; } - private static void RankingFromAdd(string variable, ICollection list) + private static IEnumerable GetLines(string filePath) { try { - var rankingUrl = RankingUrl.From(variable); - list.Add(rankingUrl); - } - catch (Exception exception) - { - Console.WriteLine(exception); - } - } - - private static List? GetLines(string filePath) - { - List? lines = null; - try - { - lines = File.ReadAllLines(filePath).Where(x => !string.IsNullOrEmpty(x)).ToList(); + return File.ReadAllLines(filePath).Where(x => !string.IsNullOrEmpty(x)).ToList(); } catch (Exception exception) { Console.WriteLine(exception); + Console.WriteLine($"[ERROR] Can't read the ScraperOutput file ({filePath})"); + return new List(); } - - return lines; } public static void Write(List rankingsUrls, string? dataFolder) @@ -118,4 +83,9 @@ private static string SelectorUrlWithEndLine(string url) { return url + "\n"; } + + private static string GetFilePath(string dataFolder) + { + return Path.Join(dataFolder, Constants.OutputLinksFilename); + } } \ No newline at end of file