From 802775b036db18349d3d88d68cb5e49cfa4ff457 Mon Sep 17 00:00:00 2001 From: jakoble <37188634+jakoble@users.noreply.github.com> Date: Tue, 26 Sep 2023 12:04:18 +0200 Subject: [PATCH] Update 1-Parliamentary corpora in the CLARIN infrastructure.csv --- .../1-Parliamentary corpora in the CLARIN infrastructure.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource_families/Corpora/Parliamentary corpora/1-Parliamentary corpora in the CLARIN infrastructure.csv b/resource_families/Corpora/Parliamentary corpora/1-Parliamentary corpora in the CLARIN infrastructure.csv index c11a8ce..93ed69a 100644 --- a/resource_families/Corpora/Parliamentary corpora/1-Parliamentary corpora in the CLARIN infrastructure.csv +++ b/resource_families/Corpora/Parliamentary corpora/1-Parliamentary corpora in the CLARIN infrastructure.csv @@ -33,7 +33,7 @@ The corpus is available for download from the Language Bank of Finland. "Norwegian Parliamentary Speech Corpus";"https://www.nb.no/sprakbanken/en/resource-catalogue/oai-nb-no-sbr-58/";"Norwegian";"140 hours; 65,000 sentences; 1.2 million words";;"CC-ZERO";;"This corpus consists of audio recordings of meetings in Stortinget (the Norwegian parliament), and corresponding orthographic transcriptions in either Norwegian Bokmål or Norwegian Nynorsk, as well as various metadata about the speakers. The official proceedings from the meetings are also included in the corpus for reference.#SEP Transcription was first done automatically; subsequently, the output of the automatic process was manually checked and corrected by trained linguists and philologists. Finally, all transcriptions were proofread to ensure consistency and accuracy. The audio files in the corpus contain the speech of entire days of plenary meetings from 2017 and 2018 (or, if a meeting lasts more than six hours, the first six hours of a day).#SEPThe corpus is available for download from the Norwegian Language Bank.";"Parliamentary corpora";"Download";"https://www.nb.no/sprakbanken/en/resource-catalogue/oai-nb-no-sbr-58/";"Solberg and Ortiz (2022)";"https://doi.org/10.48550/arXiv.2201.10881"; "Proceedings of Norwegian Parliamentary Debates";"http://hdl.handle.net/11495/DA65-D02F-0EB0-9";"Norwegian";"29 million tokens";"tokenised, sentence segmentation, speaker metadata (name, party, time, type of utterance)";"NLOD";;"The corpus contains Norwegian parliamentary debates from 2008 to 2015. #SEPThe corpus is available through the concordancer Corpuscle.";"Parliamentary corpora";"Concordancer";"http://hdl.handle.net/11495/DA65-D02F-0EB0-9";;; "Talk of Norway";"http://hdl.handle.net/11509/123";"Norwegian";"63.8 million tokens";"tokenised, PoS-tagged, lemmatised";"NLOD";;"The corpus contains Norwegian parliamentary debates from 1998 to 2016. #SEPThe corpus is available for download from the CLARINO repository.";"Parliamentary corpora";"Download";"http://hdl.handle.net/11509/123";"Lapponi et al. (2018)";"https://www.clarin.eu/resource-families/parliamentary-corpora#Lapponi%20et%20al.%202018"; -"Polish Parliamentary Corpus";"http://hdl.handle.net/11321/467";"Polish";"300 million tokens";"tokenised, MSD-tagged, named entities, etc.";;;"The corpus contains Polish parliamentary debates from 1991 to 2017. It is annotated with Morfeusz SGJP (morphological analyser), Pantera (disambiguating tagger), Spejd (shallow parser), Nerf (named entity recognizer). #SEPThe corpus is available for download from a dedicated webpage and through the concordancer NKJP. ";"Parliamentary corpora";"Concordancer#SEPDownload";"http://sejm.nlp.ipipan.waw.pl/#SEPhttp://clip.ipipan.waw.pl/PSC";"Ogrodniczuk (2012)#SEPOgrodniczuk (2018)";"https://www.clarin.eu/resource-families/parliamentary-corpora#Ogrodniczuk%202012#SEPhttps://www.clarin.eu/resource-families/parliamentary-corpora#Ogrodniczuk%202018"; +"Polish Parliamentary Corpus";"http://hdl.handle.net/11321/467";"Polish";"300 million tokens";"tokenised, MSD-tagged, named entities, etc.";;;"The corpus contains Polish parliamentary debates from 1991 to 2017. It is annotated with Morfeusz SGJP (morphological analyser), Pantera (disambiguating tagger), Spejd (shallow parser), Nerf (named entity recognizer). #SEPThe corpus is available for download from a dedicated webpage and through the concordancer NKJP. ";"Parliamentary corpora";"Concordancer#SEPDownload";"http://sejm.nlp.ipipan.waw.pl/#SEPhttp://clip.ipipan.waw.pl/PSC";"Ogrodniczuk (2018)";"http://lrec-conf.org/workshops/lrec2018/W2/pdf/11_W2.pdf"; "PTPARL Corpus";"https://hdl.handle.net/21.11129/0000-000B-D33C-4";"Portuguese";"1 million tokens";"tokenised, PoS-tagged, lemmatised";"CLARIN RES";;"The corpus contains Portuguese parliamentary debates from 1970 to 2008. It is annotated with LX-Tokenizer, LX-Tagger, MBT, MBLEM (lemmatisation). #SEPThe corpus is available for download from the CLARIN PORTUGAL repository.";"Parliamentary corpora";"Download";"https://hdl.handle.net/21.11129/0000-000B-D33C-4";"Généreux et al. (2012)";"https://www.clarin.eu/resource-families/parliamentary-corpora#G%C3%A9n%C3%A9reux%20et%20al.%202012"; "Slovenian parliamentary corpus ParlaMeter-sl 1.0";"http://hdl.handle.net/11356/1208";"Slovenian";"41 million tokens";"tokenised, MSD-tagged, lemmatised, named entities";"CC-BY";;"The corpus contains minutes of the National Assembly of the Republic of Slovenia and currently covers the VIIth mandate from 1 August 2014 to 22 June 2018. The corpus contains speaker metadata (gender, age, education, party affiliation).#SEPThe corpus is available for download from the CLARIN.SI repository and through the concordancers KonText and noSketchEngine, as well as through a dedicated dedicated webpage.";"Parliamentary corpora";"Concordancer#SEPDownload";"https://www.clarin.si/kontext/first_form?corpname=parlameter_sl#SEPhttp://hdl.handle.net/11356/1167";"Ljubešić et al. (2018)";"https://www.clarin.eu/resource-families/parliamentary-corpora#Ljube%C5%A1i%C4%87%20et%20al.%202018"; "Slovenian parliamentary corpus siParl 3.0";"http://hdl.handle.net/11356/1748";"Slovenian";"213 million words";"tokenised, PoS-tagged, lemmatised";"CC-BY";;"The corpus contains Slovenian parliamnetary debates from 1990 to 2022. It differs from the SlovParl 2.0 corpus (listed below) in that it contains only basic meta-data about the speakers, a typology of sessions and structural and editorian annotations. #SEPThe corpus is available for download from the CLARIN.SI repository and through the concordancers KonText and noSketchEngine.";"Parliamentary corpora";"Concordancer#SEPDownload";"https://www.clarin.si/noske/run.cgi/corp_info?corpname=siparl30&struct_attr_stats=1#SEPhttp://hdl.handle.net/11356/1748";;;