diff --git a/PoliNetwork.Graduatorie.Common/Enums/SchoolEnum.cs b/PoliNetwork.Graduatorie.Common/Enums/SchoolEnum.cs index 66b6eced..12bb8d69 100644 --- a/PoliNetwork.Graduatorie.Common/Enums/SchoolEnum.cs +++ b/PoliNetwork.Graduatorie.Common/Enums/SchoolEnum.cs @@ -28,7 +28,7 @@ public static string ToShortName(this SchoolEnum s) SchoolEnum.Design => "DES", SchoolEnum.Ingegneria => "ENG", SchoolEnum.Urbanistica => "URB", - _ => "UNK", + _ => "UNK" }; } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs index 599b87ca..525b664c 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs @@ -8,7 +8,6 @@ using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit; -using PoliNetwork.Graduatorie.Parser.Utils; #endregion @@ -92,20 +91,20 @@ private static bool SameHashCourse(IReadOnlyCollection? aTableCours var aHash = aTableCourse.Select(variable => { - var hashWithoutLastUpdate = Hashing.GetHashFromListHash(variable.GetHashWithoutLastUpdate()); + var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate(); return hashWithoutLastUpdate; }).ToList(); var bHash = bTableCourse.Select(variable => { - var hashWithoutLastUpdate = Hashing.GetHashFromListHash(variable.GetHashWithoutLastUpdate()); + var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate(); return hashWithoutLastUpdate; }).ToList(); - var ai = Hashing.GetHashFromListHash(aHash); - var bi = Hashing.GetHashFromListHash(bHash); + var ai = aHash; + var bi = bHash; - return (ai ?? 0) == (bi ?? 0); + return ai == bi; } private static bool SameHashMerit(MeritTable? aTableMerit, MeritTable? bTableMerit) @@ -117,9 +116,7 @@ private static bool SameHashMerit(MeritTable? aTableMerit, MeritTable? bTableMer var ai = aTableMerit.GetHashWithoutLastUpdate(); var bi = bTableMerit.GetHashWithoutLastUpdate(); - var aii = Hashing.GetHashFromListHash(ai) ?? 0; - var bii = Hashing.GetHashFromListHash(bi) ?? 0; - return aii == bii; + return ai == bi; } private static Ranking? GetRankingFromFile(string path) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs index bdb25dc1..dcff36b8 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs @@ -12,31 +12,20 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json; [Serializable] [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] -public class SingleCourseJson: IComparable +public class SingleCourseJson : IComparable { public string? BasePath; + public string? Id; public string? Link; public string? Location; - public string? Id; public RankingOrder? RankingOrder; public SchoolEnum? School; public int? Year; - public int GetHashWithoutLastUpdate() - { - var hashWithoutLastUpdate = Link?.GetHashCode() ?? "Link".GetHashCode(); - var hashCode = Id?.GetHashCode() ?? "Id".GetHashCode(); - var basePathInt = BasePath?.GetHashCode() ?? "BasePath".GetHashCode(); - var yearInt = Year?.GetHashCode() ?? "Year".GetHashCode(); - var schoolInt = School?.GetHashCode() ?? "School".GetHashCode(); - var code = "SingleCourseJson".GetHashCode(); - return hashWithoutLastUpdate ^ hashCode ^ basePathInt ^ yearInt ^ schoolInt ^ code; - } - public int CompareTo(SingleCourseJson? singleCourseJson) { if (singleCourseJson == null) return 1; - + if (Year != singleCourseJson.Year) return (Year ?? -1) < (singleCourseJson.Year ?? -1) ? -1 : 1; @@ -57,6 +46,17 @@ public int CompareTo(SingleCourseJson? singleCourseJson) return 0; } + public int GetHashWithoutLastUpdate() + { + var hashWithoutLastUpdate = Link?.GetHashCode() ?? "Link".GetHashCode(); + var hashCode = Id?.GetHashCode() ?? "Id".GetHashCode(); + var basePathInt = BasePath?.GetHashCode() ?? "BasePath".GetHashCode(); + var yearInt = Year?.GetHashCode() ?? "Year".GetHashCode(); + var schoolInt = School?.GetHashCode() ?? "School".GetHashCode(); + var code = "SingleCourseJson".GetHashCode(); + return hashWithoutLastUpdate ^ hashCode ^ basePathInt ^ yearInt ^ schoolInt ^ code; + } + public bool Is(CourseTable courseTable) { return (RankingOrder?.Phase ?? "") == courseTable.Title; diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs index 4a4fad20..ddea20f2 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs @@ -9,7 +9,6 @@ using PoliNetwork.Graduatorie.Parser.Objects.Json.Stats; using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit; -using PoliNetwork.Graduatorie.Parser.Utils; using PoliNetwork.Graduatorie.Parser.Utils.Output; #endregion @@ -30,19 +29,19 @@ public class Ranking : IComparable public RankingUrl? Url; public int? Year; - public RankingSummaryStudent GetRankingSummaryStudent() - { - return new RankingSummaryStudent(RankingOrder?.Phase, School, Year, Url); - } - public int CompareTo(Ranking? other) { if (ReferenceEquals(this, other)) return 0; if (ReferenceEquals(null, other)) return 1; - + return string.Compare(GetId(), other.GetId(), StringComparison.Ordinal); } + public RankingSummaryStudent GetRankingSummaryStudent() + { + return new RankingSummaryStudent(RankingOrder?.Phase, School, Year, Url); + } + /*** * Ottieni l'hash senza considerare il valore di LastUpdate @@ -57,7 +56,7 @@ public int GetHashWithoutLastUpdate() i ^= Url?.GetHashWithoutLastUpdate() ?? "Url".GetHashCode(); i ^= Year?.GetHashCode() ?? "Year".GetHashCode(); var iMerit = ByMerit?.GetHashWithoutLastUpdate(); - i ^= Hashing.GetHashFromListHash(iMerit) ?? "ByMerit".GetHashCode(); + i ^= iMerit ?? "ByMerit".GetHashCode(); if (ByCourse == null) @@ -66,7 +65,7 @@ public int GetHashWithoutLastUpdate() i = ByCourse.Aggregate(i, (current, variable) => { var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate(); - var iList = Hashing.GetHashFromListHash(hashWithoutLastUpdate) ?? "empty".GetHashCode(); + var iList = hashWithoutLastUpdate; return current ^ iList; }); @@ -113,7 +112,7 @@ public string GetFilename() public string GetId() { var idList = new List(); - + var schoolShort = School?.ToShortName(); if (schoolShort != null) idList.Add(schoolShort); @@ -122,7 +121,7 @@ public string GetId() var orderId = RankingOrder?.GetId(); if (orderId != null) idList.Add(orderId); - + var fallback = DateTime.UtcNow.ToString("yyyyMMddTHHmmss", CultureInfo.InvariantCulture) + "Z"; if (idList.Count == 0) idList.Add(fallback); diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs index e7407401..561908ca 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs @@ -11,6 +11,9 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.RankingNS; [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class RankingOrder { + public bool IsAnticipata; // used for DES/URB rankings until 2023 + public bool IsEnglish; + public bool IsExtraEu; public string? Phase; // the original string (e.g. " //esempio: @@ -18,15 +21,12 @@ public class RankingOrder //prima graduatoria di seconda fase:{primary:2, secondary:1} public int? Primary; public int? Secondary; - public bool IsAnticipata; // used for DES/URB rankings until 2023 - public bool IsExtraEu; - public bool IsEnglish; public RankingOrder(string phase, bool isExtraEu = false, bool isEnglish = false) { Phase = phase; ParsePhaseString(phase); - + IsExtraEu = isExtraEu; IsEnglish = isEnglish; } @@ -35,12 +35,12 @@ private void ParsePhaseString(string phase) { var s = phase.ToUpper().Trim(); if (string.IsNullOrEmpty(s)) return; - + var strings = s.Split(" "); - + IsAnticipata = s.Contains("ANTICIPATA"); if (IsAnticipata) return; - + Primary = ExtractPhaseNumberByKey(strings, "FASE"); Secondary = ExtractPhaseNumberByKey(strings, "GRADUATORIA"); } @@ -74,22 +74,19 @@ private void ParsePhaseString(string phase) public string GetId() { var idList = new List(); - if (IsAnticipata) idList.Add($"anticipata"); + if (IsAnticipata) idList.Add("anticipata"); if (Primary != null) idList.Add($"{Primary}fase"); if (Secondary != null) idList.Add($"{Secondary}grad"); - + var cleanPhase = Phase?.Replace("_", "").Replace("-", "").Replace(" ", "_").ToLower() ?? ""; - var noOrder = IsAnticipata == false && Primary == null && Secondary == null; + var noOrder = IsAnticipata == false && Primary == null && Secondary == null; var isSingleExtraEu = noOrder && cleanPhase.Contains("extraue"); - if (noOrder) - { - idList.Add(isSingleExtraEu ? "extraeu" : cleanPhase); - } - + if (noOrder) idList.Add(isSingleExtraEu ? "extraeu" : cleanPhase); + idList.Add(IsEnglish ? "eng" : "ita"); if (IsExtraEu && !isSingleExtraEu) idList.Add("extraeu"); // the second condition is to avoid double extraeu - + var id = string.Join("_", idList); return id; } @@ -111,4 +108,4 @@ public void Merge(RankingOrder? rankingRankingOrder) Primary ??= rankingRankingOrder?.Primary; Secondary ??= rankingRankingOrder?.Secondary; } -} +} \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/StudentResult.cs b/PoliNetwork.Graduatorie.Parser/Objects/StudentResult.cs index 267e800d..37d69a09 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/StudentResult.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/StudentResult.cs @@ -3,6 +3,7 @@ using Newtonsoft.Json; using Newtonsoft.Json.Serialization; using PoliNetwork.Graduatorie.Common.Objects; +using PoliNetwork.Graduatorie.Parser.Utils; #endregion @@ -22,7 +23,7 @@ public class StudentResult public decimal? Result; public SortedDictionary? SectionsResults; - public List GetHashWithoutLastUpdate() + public int GetHashWithoutLastUpdate() { var r = new List { @@ -47,6 +48,6 @@ public class StudentResult r.Add(SectionsResults.Aggregate("SectionsResultsFull".GetHashCode(), (current, variable) => current ^ variable.Key.GetHashCode() ^ variable.Value.GetHashCode())); - return r; + return Hashing.GetHashFromListHash(r); } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Tables/Merit/MeritTable.cs b/PoliNetwork.Graduatorie.Parser/Objects/Tables/Merit/MeritTable.cs index a796b4c5..1add9a7d 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Tables/Merit/MeritTable.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Tables/Merit/MeritTable.cs @@ -17,7 +17,7 @@ public class MeritTable public List? Rows; public int? Year; - public List GetHashWithoutLastUpdate() + public int GetHashWithoutLastUpdate() { var r = new List { "MeritTable".GetHashCode() }; if (Headers != null) @@ -30,14 +30,13 @@ public class MeritTable r.Add(Rows.Aggregate("RowsFull".GetHashCode(), (current, variable) => { var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate(); - var hashFromListHash = Hashing.GetHashFromListHash(hashWithoutLastUpdate) ?? "empty3".GetHashCode(); - return current ^ hashFromListHash; + return current ^ hashWithoutLastUpdate; })); else r.Add("RowsEmpty".GetHashCode()); r.Add(Year?.GetHashCode() ?? "Year".GetHashCode()); r.Add(Path?.GetHashCode() ?? "Path".GetHashCode()); - return r; + return Hashing.GetHashFromListHash(r); } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Hashing.cs b/PoliNetwork.Graduatorie.Parser/Utils/Hashing.cs index e71a7ceb..b5eb024f 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Hashing.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Hashing.cs @@ -2,13 +2,14 @@ namespace PoliNetwork.Graduatorie.Parser.Utils; public static class Hashing { - public static int? GetHashFromListHash(IReadOnlyCollection? iMerit) + public static int GetHashFromListHash(IReadOnlyCollection? iMerit) { if (iMerit == null) - return null; + return 0; if (iMerit.Count == 0) - return null; + return 0; - return iMerit.Aggregate(0, (current, variable) => current ^ variable ?? 0); + var hashFromListHash = iMerit.Aggregate(0, (current, variable) => current ^ variable ?? 0); + return hashFromListHash; } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs index 781c9e69..106e9500 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs @@ -208,26 +208,27 @@ private RankingsSet ParseNewRankings(IReadOnlyCollection htmls) var extraEuStr = intestazioni[4].Split("\n")[0].ToLower(); var isExtraEu = extraEuStr.Contains("extra-ue"); - if (ranking.Year < 2024) { + if (ranking.Year < 2024) + { // layout valid until 2023 var phase = string.Join(" ", intestazioni[3].Split(" - ")[1..]); ranking.RankingOrder = new RankingOrder(phase, isExtraEu); if (ranking.School == SchoolEnum.Architettura && ranking.RankingOrder.Primary == null && ranking.RankingOrder.Secondary == null && ranking.RankingOrder.IsExtraEu) - { // this is a fallback for 2020-2023: // POLIMI was used to add the ranking number (Secondary, e.g. "Prima Graduatoria") for ExtraEU starting // from the second ranking. // e.g. Extra-EU first ranking => phase = "Extra-ue", // Extra-EU second ranking => phase = "Extra-ue - Seconda Graduatoria" // so this is a fallback to add the equivalent of "Prima Graduatoria" to the first ExtraEU ranking. - ranking.RankingOrder.Secondary = 1; - } - } else { + } + else + { // layout valid since 2024 (if the layout changes again, make another else if) var phase = intestazioni[3]; - var isEnglish = intestazioni[2].Contains("taught in english") || intestazioni[2].Contains("erogati in inglese"); + var isEnglish = intestazioni[2].Contains("taught in english") || + intestazioni[2].Contains("erogati in inglese"); ranking.RankingOrder = new RankingOrder(phase, isExtraEu, isEnglish); } @@ -774,4 +775,4 @@ private IEnumerable ParseLocalHtmlFiles() ); return obj2; } -} +} \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/Scraper.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/Scraper.cs index 2dc91e58..f13f0154 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/Scraper.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/Scraper.cs @@ -40,7 +40,8 @@ private IEnumerable ScrapeAvvisiFuturiStudenti() var page = _web.Load(AvvisiFuturiStudentiUrl).DocumentNode; var newsCards = - page.SelectNodes("//div[contains(@class, 'news')]//div[contains(@class, 'row--card')]//div[contains(@class, 'card__content')]"); + page.SelectNodes( + "//div[contains(@class, 'news')]//div[contains(@class, 'row--card')]//div[contains(@class, 'card__content')]"); if (newsCards == null) return links; foreach (var card in newsCards) @@ -48,7 +49,7 @@ private IEnumerable ScrapeAvvisiFuturiStudenti() var title = card.Descendants("h5").First(); var titleValid = title != null && IsValidText(title.InnerText); - var body = card.Descendants("p").Where(el => el.ParentNode.HasClass("news-bodytext")).First(); + var body = card.Descendants("p").First(el => el.ParentNode.HasClass("news-bodytext")); var bodyValid = body != null && IsValidText(body.InnerText); var aTag = card.Descendants("a").First(); @@ -134,4 +135,4 @@ private bool IsValidText(string text) return null; } } -} +} \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/UrlUtils.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/UrlUtils.cs index fcf0d953..30bffc81 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/UrlUtils.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/UrlUtils.cs @@ -40,4 +40,4 @@ public static bool CheckUrl(RankingUrl? url) return false; } } -} +} \ No newline at end of file