diff --git a/PoliNetwork.Graduatorie.Common/Data/Constants.cs b/PoliNetwork.Graduatorie.Common/Data/Constants.cs index 121af72b..1edaff74 100644 --- a/PoliNetwork.Graduatorie.Common/Data/Constants.cs +++ b/PoliNetwork.Graduatorie.Common/Data/Constants.cs @@ -9,4 +9,5 @@ public static class Constants public const string HtmlFolder = "html"; public const string OutputLinksFilename = "links.txt"; public const string DataFolder = "data"; + public const string LocationPlaceholder = "0"; } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs index f7f51b21..ae84a487 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs @@ -1,17 +1,20 @@ #region - +using System.Diagnostics; using Newtonsoft.Json; using Newtonsoft.Json.Serialization; -using PoliNetwork.Graduatorie.Common.Data; using PoliNetwork.Graduatorie.Common.Enums; -using PoliNetwork.Graduatorie.Common.Utils.ParallelNS; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; -using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; - #endregion namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; +using SchoolsDict = + SortedDictionary>>>>; +using YearsDict = SortedDictionary>>>; +using CoursesDict = SortedDictionary>>; +using CourseDict = SortedDictionary>; + [Serializable] [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class BySchoolYearCourseJson : IndexJsonBase @@ -19,10 +22,7 @@ public class BySchoolYearCourseJson : IndexJsonBase internal const string PathCustom = "bySchoolYearCourse.json"; //keys: school, year, course, location - public SortedDictionary< - SchoolEnum, - SortedDictionary>>> - > Schools = new(); + public SchoolsDict Schools = new(); public static BySchoolYearCourseJson? From(RankingsSet? set) { @@ -30,15 +30,15 @@ public SortedDictionary< return null; var mainJson = new BySchoolYearCourseJson { LastUpdate = set.LastUpdate }; + // group rankings by school - var bySchool = set.Rankings.GroupBy(r => r.School); + var bySchool = set.Rankings.Where(r => r.School != null).GroupBy(r => r.School!.Value); + foreach (var schoolGroup in bySchool) { - if (schoolGroup.Key is null) - continue; - var school = schoolGroup.Key.Value; + var school = schoolGroup.Key; - var byYears = schoolGroup.GroupBy(r => r.Year); + var byYears = schoolGroup.Where(r => r.Year != null).GroupBy(r => r.Year!.Value); var yearsDict = GetYearsDict(byYears); mainJson.Schools.Add(school, yearsDict); @@ -48,249 +48,67 @@ public SortedDictionary< return mainJson; } - private static SortedDictionary< - int, - SortedDictionary>> - > GetYearsDict(IEnumerable> byYears) - { - var d = - new SortedDictionary>>>(); - - foreach (var yearGroup in byYears) GetYearsDictSingle(yearGroup, d); - - return d; - } - - private static void GetYearsDictSingle(IGrouping yearGroup, - IDictionary>>> d) + private static YearsDict GetYearsDict(IEnumerable> byYears) { - if (yearGroup.Key != null) d.Add(yearGroup.Key.Value, GetCoursesDict(yearGroup)); - } - - private static SortedDictionary>> GetCoursesDict( - IEnumerable yearGroup - ) - { - var d = new SortedDictionary>>(); + var yearsDict = new YearsDict(); - foreach (var ranking in yearGroup) + foreach (var yearGroup in byYears) { - if (ranking.ByCourse == null) - continue; - - var byTitle = ranking.ByCourse.GroupBy(c => c.Title); - foreach (var courseGroup in byTitle) - AddCourseToDict(d, ranking, courseGroup); + var coursesDict = GetCoursesDict(yearGroup); + yearsDict.Add(yearGroup.Key, coursesDict); } - return d; + return yearsDict; } - private static void AddCourseToDict( - IDictionary>> d, - Ranking ranking, - IGrouping courseGroup - ) + private static CoursesDict GetCoursesDict(IGrouping yearGroup) { - var title = courseGroup.Key; - if (string.IsNullOrEmpty(title)) - return; - - if (!d.ContainsKey(title)) - d[title] = new SortedDictionary>(); + var coursesDict = new CoursesDict(); - var courseDict = d[title]; - foreach (var course in courseGroup) + foreach (var ranking in yearGroup) { - var location = course.Location; - - // fixedLocation - // esempio: Urbanistica 2022 ha un solo corso senza location, ma anche quello - // deve comparire nella lista - // fix: se un corso non ha location, si inserisce un valore 0 - var fixedLocation = string.IsNullOrEmpty(location) ? "0" : location; - - if (!courseDict.ContainsKey(fixedLocation)) - courseDict[fixedLocation] = new List(); - - var locationDict = courseDict[fixedLocation]; - var singleCourseJson = CreateCourseJson(ranking, course); - - if (locationDict.Any(IsThisCourse)) + if (ranking.ByCourse == null) continue; - locationDict.Add(singleCourseJson); - locationDict.Sort(Comparison); - continue; + var byTitle = + ranking.ByCourse.Where(c => c.Title != null).GroupBy(c => c.Title!); // e.g. INGEGNERIA AEROSPAZIALE - bool IsThisCourse(SingleCourseJson x) - { - return x.Link == singleCourseJson.Link && x.Location == singleCourseJson.Location; - } - } - } - - private static int Comparison(SingleCourseJson x, SingleCourseJson y) - { - return x.CompareTo(y); - } - - private static SingleCourseJson CreateCourseJson(Ranking ranking, CourseTable course) - { - var basePath = ranking.School + "/" + ranking.Year + "/"; - return new SingleCourseJson - { - Link = ranking.GetFilename(), - Id = ranking.GetId(), - BasePath = basePath, - Year = ranking.Year, - School = ranking.School, - Location = course.Location, - RankingOrder = ranking.RankingOrder - }; - } - - private static bool IsSimilar(IEnumerable yearGroup, SingleCourseJson singleCourseJson) - { - var enumerable = yearGroup.Where(v1 => v1.ByCourse != null); - - return enumerable.Any(Predicate); - - bool Predicate(Ranking v1) - { - return singleCourseJson.School == v1.School - && singleCourseJson.Year == v1.Year - && singleCourseJson.RankingOrder?.GetId() == v1.RankingOrder?.GetId(); - } - } - - public static RankingsSet? Parse(string dataFolder) - { - var outFolder = Path.Join(dataFolder, Constants.OutputFolder); - var mainJsonPath = Path.Join(outFolder, PathCustom); - try - { - var mainJson = Utils.Transformer.ParserNS.Parser.ParseJson(mainJsonPath); - if (mainJson is null) - return null; - - var rankings = RankingsAdd(mainJson, outFolder); - - return new RankingsSet { LastUpdate = mainJson.LastUpdate, Rankings = rankings }; - } - catch - { - // ignored - } - - return null; - } - - private static List RankingsAdd(BySchoolYearCourseJson mainJson, string outFolder) - { - List rankings = new(); - foreach (var school in mainJson.Schools) - foreach (var year in school.Value) - RankingsAddSingleYearSchool(year, outFolder, school, rankings); - - return rankings; - } - - private static void RankingsAddSingleYearSchool( - KeyValuePair>>> year, - string outFolder, - KeyValuePair< - SchoolEnum, - SortedDictionary>>> - > school, - ICollection rankings - ) - { - var actions = new List(); - foreach (var filename in year.Value) - { - var collection = filename.Value.Select(Selector); - actions.AddRange(collection); - continue; - - Action Selector(KeyValuePair> variable) + foreach (var courseGroup in byTitle) { - return () => { RankingAdd(school, year, outFolder, variable, rankings); }; + var alreadyExisted = coursesDict.ContainsKey(courseGroup.Key); + var courseDict = alreadyExisted + ? coursesDict[courseGroup.Key] + : new CourseDict(); + + foreach (var courseTable in courseGroup) + { + var location = courseTable.GetFixedLocation(); + if (!courseDict.ContainsKey(location)) + { + // first time this location is encountered, + // so we instantiate the list for this location + var newLocationList = new List(); + courseDict.Add(location, newLocationList); + } + + var locationList = courseDict.GetValueOrDefault(location); + if (locationList == null) + throw new UnreachableException(); // this should never happen at this point + + var singleCourseJson = SingleCourseJson.From(ranking, courseTable); + + if (locationList.Any( + x => x.Id == singleCourseJson.Id && x.Location == singleCourseJson.Location)) + continue; + + locationList.Add(singleCourseJson); + locationList.Sort(); + } + + if (!alreadyExisted) coursesDict.Add(courseGroup.Key, courseDict); } } - ParallelRun.Run(actions.ToArray()); - } - - private static void RankingAdd( - KeyValuePair< - SchoolEnum, - SortedDictionary>>> - > school, - KeyValuePair>>> year, - string outFolder, - KeyValuePair> filename, - ICollection rankings - ) - { - foreach (var variable in filename.Value) - RankingAddSingle(school, year, outFolder, rankings, variable); - } - - private static void RankingAddSingle( - KeyValuePair< - SchoolEnum, - SortedDictionary>>> - > school, - KeyValuePair>>> year, - string outFolder, - ICollection rankings, - SingleCourseJson variable - ) - { - var schoolKey = school.Key.ToString(); - var yearKey = year.Key.ToString(); - var path = Path.Join(outFolder, schoolKey, yearKey, variable.Link); - var ranking = Utils.Transformer.ParserNS.Parser.ParseJsonRanking(path); - if (ranking == null) - return; - - lock (rankings) - { - AddToRankings(rankings, ranking); - } - } - - private static void AddToRankings(ICollection rankings, Ranking ranking) - { - var any = rankings.Any( - x => - x.School == ranking.School - && x.Year == ranking.Year - && Similar(x.ByCourse, ranking.ByCourse) - ); - - if (!any) - rankings.Add(ranking); - } - - private static bool Similar( - IReadOnlyCollection? a, - IReadOnlyCollection? b - ) - { - if (a == null || b == null) - return false; - return a.Count == b.Count && a.Select(Selector).All(Predicate); - - bool Selector(CourseTable variable) - { - return b.Any(x => x.Title == variable.Title); - } - - bool Predicate(bool boolB) - { - return boolB; - } + return coursesDict; } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs index bdb25dc1..0a6d1b72 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs @@ -22,6 +22,21 @@ public class SingleCourseJson: IComparable public SchoolEnum? School; public int? Year; + public static SingleCourseJson From(Ranking ranking, CourseTable? course) + { + var basePath = $"{ranking.School}/{ranking.Year}/"; // "Ingegneria/2023" + return new SingleCourseJson + { + Link = ranking.GetFilename(), + Id = ranking.GetId(), + BasePath = basePath, + Year = ranking.Year, + School = ranking.School, + Location = course?.Location, + RankingOrder = ranking.RankingOrder + }; + } + public int GetHashWithoutLastUpdate() { var hashWithoutLastUpdate = Link?.GetHashCode() ?? "Link".GetHashCode(); diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs b/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs index d9ef3c26..17aac09b 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs @@ -2,6 +2,7 @@ using Newtonsoft.Json; using Newtonsoft.Json.Serialization; +using PoliNetwork.Graduatorie.Common.Data; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit; using PoliNetwork.Graduatorie.Parser.Utils.Output; @@ -28,4 +29,19 @@ public RankingSummaryStudent GetRankingSummaryStudent(Ranking ranking) return new RankingSummaryStudent(Title, ranking.RankingOrder?.Phase, ranking.School, ranking.Url, ranking.Year); } + + /// + /// Get the course location if present, otherwise get the placeholder (constant). + /// Useful for index purposes. + /// + /// A string with the location or the placeholder + public string GetFixedLocation() + { + // fixedLocation + // esempio: Urbanistica 2022 ha un solo corso senza location, ma anche quello + // deve comparire nella lista + // fix: se un corso non ha location, si inserisce un valore 0 + if (string.IsNullOrEmpty(Location)) return Constants.LocationPlaceholder; + return Location; + } } \ No newline at end of file