-
Notifications
You must be signed in to change notification settings - Fork 3
/
Pirinen-2009-fsmnlp.html
1524 lines (1372 loc) · 108 KB
/
Pirinen-2009-fsmnlp.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html><html>
<head>
<title>Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009.</title>
<!--Generated on Fri Oct 13 18:33:09 2017 by LaTeXML (version 0.8.2) http://dlmf.nist.gov/LaTeXML/.-->
<!--Document created on Last modification: October 13, 2017.-->
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<link rel="stylesheet" href="../latexml/LaTeXML.css" type="text/css">
<link rel="stylesheet" href="../latexml/ltx-article.css" type="text/css">
</head>
<body>
<div class="ltx_page_main">
<div class="ltx_page_content">
<article class="ltx_document ltx_authors_1line">
<h1 class="ltx_title ltx_title_document">Weighting Finite-State Morphological Analyzers
using <span class="ltx_text ltx_font_smallcaps">HFST</span> Tools
<span class="ltx_note ltx_role_footnote"><sup class="ltx_note_mark">1</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">1</sup>The official publication was in the Proceedings of
FSMNLP 2009.</span></span></span>
</h1>
<div class="ltx_authors">
<span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Krister Lindén
</span></span>
<span class="ltx_author_before"> </span><span class="ltx_creator ltx_role_author">
<span class="ltx_personname">Tommi Pirinen
<br class="ltx_break">University of Helsinki
<br class="ltx_break">Helsinki, Finland
<br class="ltx_break">{krister.linden,tommi.pirinen}@helsinki.fi
<br class="ltx_break">
</span></span>
</div>
<div class="ltx_date ltx_role_creation">Last modification: October 13, 2017</div>
<div class="ltx_abstract">
<h6 class="ltx_title ltx_title_abstract">Abstract</h6>
<p class="ltx_p">In a language with very productive compounding and a rich
inflectional system, e.g. Finnish, new words are to a large extent
formed by compounding. In order to disambiguate between the possible
compound segmentations, a probabilistic strategy has been found
effective by Lindén and Pirinen <cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib7" title="" class="ltx_ref">7</a>]</cite>. In this
article, we present a method for implementing the probabilistic
framework as a separate process which can be combined through
composition with a lexical transducer to create a weighted
morphological analyzer. To implement the analyzer, we use the
<span class="ltx_text ltx_font_smallcaps">HFST-LexC</span> and related command line tools which are part of
the open source <em class="ltx_emph">Helsinki Finite-State Technology</em> package.
Using Finnish as a test language, we show how to use the weighted
finite-state lexicon for building a simple unigram tagger with 97 %
precision for Finnish words and word segments belonging to the
vocabulary of the lexicon.
</p>
</div>
<section id="S1" class="ltx_section">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">1 </span>Introduction</h2>
<div id="S1.p1" class="ltx_para">
<p class="ltx_p">In English the received wisdom is that traditional morphological
analysis is too complex for statistical taggers to deal with; a
simplified tagging scheme is needed. The disambiguation accuracy will
otherwise be too low even with an n-gram tagger because there is not
enough training material. However, currently training material for
morphological disambiguators is abundantly available. At the same
time, one could argue that the interest in tagging has disappeared,
because we can do more complex things such as syntactic dependency
analysis and get the morphological disambiguation as a side effect. As
a matter of curiosity, we will still pursue statistical tagging,
because there is also the initial result often attributed to Ken
Church that approximately 90 % of the readings in English will be
correct if one simply gives each word its most frequent
morphosyntactic tag. We wish to derive a similar baseline for Finnish.</p>
</div>
<div id="S1.p2" class="ltx_para">
<p class="ltx_p">In addition, a morphologically complex language like Finnish is
different than English. In English there are hardly any inflectional
endings and applying traditional morphological analysis to English
necessarily creates massive ambiguity that can only be resolved by
context, whereas morphologically complex languages like Finnish in
each word most often carry the morphemes referred to by the
morphological tags. As the morphological tags have a physical
correspondence in the strings, it should be possible to use much less
context, or perhaps none at all, to disambiguate the traditional
morphological analysis of languages like Finnish. After all, the
reduced tag sets of English statistical taggers can be viewed as an
attempt to simplify the tag set to refer only to the visible surface
morphemes in a locally constrained context.</p>
</div>
<div id="S1.p3" class="ltx_para">
<p class="ltx_p">There are some initial encouraging results by Lindén and Pirinen
<cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib7" title="" class="ltx_ref">7</a>]</cite> for disambiguating Finnish compounds using
unigram statistics for the parts in a productive compound process.
Unigram statistics for compounds is essentially the same as taking the
most likely morpheme segmentation and the most frequent reading of
each compound word. Similar results for disambiguating compounds using
a slightly different basis for estimating the probabilities have been
demonstrated for German by Schiller <cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib11" title="" class="ltx_ref">11</a>]</cite> and by Marek
<cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib9" title="" class="ltx_ref">9</a>]</cite>. These results further encourage us to pursue the
topic of full morphological tagging for a complex language like
Finnish using only a lexicon and unigram statistics for the words and
their compound parts.</p>
</div>
<div id="S1.p4" class="ltx_para">
<p class="ltx_p">In <cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib7" title="" class="ltx_ref">7</a>]</cite>, Lindén and Pirinen suggest a method which
essentially requires the building of a full form lexicon and an
estimate for each separate word form. This is not particularly
convenient, instead we introduce a simplified way to weight the
different parts of the lexicon with frequency data from a corpus by
using weighted finite-state transducer calculus. We use the open
source software tools of
<span class="ltx_text ltx_font_smallcaps">HFST<span class="ltx_note ltx_role_footnote"><sup class="ltx_note_mark">2</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">2</sup><a href="hfst.sourceforge.net" title="" class="ltx_ref ltx_url ltx_font_typewriter ltx_font_upright">hfst.sourceforge.net</a></span></span></span></span>, which contains
<span class="ltx_text ltx_font_smallcaps">HFST-LexC</span> similar to the Xerox LexC tool
<cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib2" title="" class="ltx_ref">2</a>]</cite>. In addition to compiling LexC-style lexicons,
<span class="ltx_text ltx_font_smallcaps">HFST-LexC</span> has a mechanism for adding weights to compound
parts and morphological analyses. The <span class="ltx_text ltx_font_smallcaps">HFST</span> tools also contain
a set of command line tools that are convenient for creating the final
weighted morphological analyzer using transducer calculus.</p>
</div>
<div id="S1.p5" class="ltx_para">
<p class="ltx_p">We apply the weighted morphological analyzer to the task of
morphologically tagging Finnish text. As expected, it turns out that a
highly inflecting and compounding language with a free word order like
Finnish solves many of its linguistic ambiguities during word
formation. This pays back in the form of 97 % tagger precision using
only a very simple unigram tagger in the form of a weighted
morphological lexicon for the words and word parts that are in the
lexicon. For words that contain unknown parts, the lexicalized
strategy is, however, rather toothless. For such words it seems, we
may, after all, need a traditional guesser and n-gram statistics for
morphological disambiguation.</p>
</div>
<div id="S1.p6" class="ltx_para">
<p class="ltx_p">The remainder of the article is structured as follows. In
Sections <a href="#S2" title="2 Finnish Morphology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">2</span></a>, we briefly present some aspects of Finnish
morphology that may be problematic for statistical tagging. In
Section <a href="#S3" title="3 Methodology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">3</span></a>, we introduce the probabilistic formulation of how
to weight lexical entries. In Section <a href="#S4" title="4 Data Sets ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">4</span></a>, we introduce the
test and training corpora. In Section <a href="#S5" title="5 Tests and Results ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">5</span></a>, we evaluate the
weighted lexicon on tagging Finnish text. Finally, in
Sections <a href="#S6" title="6 Discussion and Further Research ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">6</span></a> and <a href="#S7" title="7 Conclusions ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">7</span></a>, we discuss the results and draw
the conclusions.</p>
</div>
</section>
<section id="S2" class="ltx_section">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">2 </span>Finnish Morphology</h2>
<div id="S2.p1" class="ltx_para">
<p class="ltx_p">We present some aspects of Finnish inflectional and compounding
morphology that may be problematic for statistical tagging in
Sections <a href="#S2.SS1" title="2.1 Inflection in Finnish ‣ 2 Finnish Morphology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">2.1</span></a> and <a href="#S2.SS2" title="2.2 Compounding in Finnish ‣ 2 Finnish Morphology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">2.2</span></a>. For a more thorough
introduction to Finnish morphology, see Karlsson <cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib5" title="" class="ltx_ref">5</a>]</cite>,
and for an implementation of computational morphology, see Koskenniemi
<cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib6" title="" class="ltx_ref">6</a>]</cite>. In Section <a href="#S2.SS2" title="2.2 Compounding in Finnish ‣ 2 Finnish Morphology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">2.2</span></a>, we present an outline
of how to implement the morphology in sublexicons which are useful for
weighting.</p>
</div>
<section id="S2.SS1" class="ltx_subsection">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection">2.1 </span>Inflection in Finnish</h3>
<div id="S2.SS1.p1" class="ltx_para">
<p class="ltx_p">In Finnish morphology, the inflection of typical nouns produces
several thousands of forms for the productive inflection. E.g. a noun
has more than 12 cases in singular and plural as well as possessive
suffixes and clitic particles resulting in more than 2000 forms for
every noun.</p>
</div>
<div id="S2.SS1.p2" class="ltx_para">
<p class="ltx_p">Mostly the traditional linguistically motivated morphological analysis
of Finnish is based on visible morphemes. However, for illustrational
purposes we will discuss two prototypical cases where the analysis
needs context. One such case is where a possessive suffix overrides
the case ending to create ambiguity: <span class="ltx_text ltx_font_italic">taloni</span> ’my house/of my
house/my houses’, i.e. either <span class="ltx_text ltx_font_italic">talo</span> ’house’ nominative singular,
<span class="ltx_text ltx_font_italic">talon</span> ’of the house’ genitive singular or <span class="ltx_text ltx_font_italic">talot</span> ’houses’
nominative plural followed by a possessive suffix. This ambiguity is
systematic, so either the distinctions can be left out or one can
create a complex underspecified tag <span class="ltx_text ltx_font_italic">+Sg+Nom/+Sg+Gen/+Pl+Nom</span> for
this case.</p>
</div>
<div id="S2.SS1.p3" class="ltx_para">
<p class="ltx_p">Another case, which is common in most languages, is the distinction
between nouns or adjectives and participles of verbs. This often
affects the choice of baseform for the word, i.e. the baseform of
’writing’ is either a verb such as ’write’ or a noun such as
’writing’. In Finnish, we have words like <span class="ltx_text ltx_font_italic">taitava</span> ’skillful
Adjective’ or ’know Verb Present Participle’ and <span class="ltx_text ltx_font_italic">kokenut</span>
’experienced Adjective’ or ’experience Verb Past Participle’. Since
the two readings have different baseforms, it is not be possible to
defer the ambiguity to be resolved later by using underspecification.
In some cases, one of the forms is rare and can perhaps be ignored
with a minimal loss of information, but sometimes both occur regularly
and in overlapping contexts, in which case both forms should be
postulated and eventually disambiguated. However, sufficient
information for doing this reliably may not be available before some
degree of syntactic or semantic analysis.</p>
</div>
<div id="S2.SS1.p4" class="ltx_para">
<p class="ltx_p">In Sections <a href="#S5" title="5 Tests and Results ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">5</span></a> and <a href="#S6" title="6 Discussion and Further Research ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">6</span></a>, we will return to the
significance of these problems in Finnish and their impact on the
morphological disambiguation.</p>
</div>
</section>
<section id="S2.SS2" class="ltx_subsection">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection">2.2 </span>Compounding in Finnish</h3>
<div id="S2.SS2.p1" class="ltx_para">
<p class="ltx_p">Finnish compounding theoretically allows nominal compounds of
arbitrary length to be created from initial parts of certain noun
forms. The final part may be inflected in all possible forms.</p>
</div>
<div id="S2.SS2.p2" class="ltx_para">
<p class="ltx_p">Normal inflected Finnish noun compounds correspond to prepositional
phrases in English, e.g. <span class="ltx_text ltx_font_italic">ostoskeskuksessa</span> ’in the shopping
center’. The morphological analysis in Finnish of the previous phrase
into <span class="ltx_text ltx_font_italic">ostos#keskus+N+Sg+Ine</span> corresponds in English to noun
chunking and case analysis into ’shopping center +N+Sg+Loc:In’.</p>
</div>
<div id="S2.SS2.p3" class="ltx_para">
<p class="ltx_p">In extreme cases, such as the compounds describing ancestors, nouns
are compounded from zero or more of <em class="ltx_emph">isän</em> ‘father
<span class="ltx_text ltx_font_smallcaps">singular genitive</span>’ and <em class="ltx_emph">äidin</em> ‘mother <span class="ltx_text ltx_font_smallcaps">singular
genitive</span>’ and then one of the inflected forms of <em class="ltx_emph">isä</em> or
<em class="ltx_emph">äiti</em> creating forms such as <em class="ltx_emph">äidinisälle</em> ‘to (maternal)
grandfather’ or <em class="ltx_emph">isänisänisänisä</em> ‘great great grandfather’. As
for the potential ambiguity, Finnish also has the noun <em class="ltx_emph">nisä</em>
‘udder’, which creates ambiguity for any paternal grandfather,
e.g. <em class="ltx_emph">isän#isän#isän#isä</em>, <em class="ltx_emph">isän#isä#nisän#isä</em>,
<em class="ltx_emph">isä#nisä#nisä#nisä</em>, …</p>
</div>
<div id="S2.SS2.p4" class="ltx_para">
<p class="ltx_p">Finnish compounding also includes forms of compounding where all parts
of the word are inflected in the same form, but this is limited to a
small fraction of adjective initial compounds and to the numbers if
they are spelled out with letters. In addition, some inflected verb
forms may appear as parts of compounds. These are much more rare than
nominal compounds <cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib4" title="" class="ltx_ref">4</a>]</cite> so they do not interfere with the
regular compounding.</p>
</div>
</section>
<section id="S2.SS3" class="ltx_subsection">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection">2.3 </span>Finnish Computational Morphology</h3>
<div id="S2.SS3.p1" class="ltx_para">
<p class="ltx_p">Pirinen <cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib10" title="" class="ltx_ref">10</a>]</cite> presented an open source implementation of
a finite state morphological analyzer for Finnish, which has been
reimplemented with the <span class="ltx_text ltx_font_smallcaps">HFST</span> tools and extended with data
collected and classified by Listenmaa <cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib8" title="" class="ltx_ref">8</a>]</cite>. We use the
reimplemented and extended version as our unweighted lexicon.
Pirinen’s analyzer has a fully productive noun compounding
mechanism. Fully productive noun compounding means that it allows
compounds of arbitrary length with any combination of nominative
singulars, genitive singulars, or genitive plurals in the initial part
and any inflected form of a noun as the final part.</p>
</div>
<div id="S2.SS3.p2" class="ltx_para">
<p class="ltx_p">The morphotactic combination of morphemes is achieved by combining
sublexicons as defined in <cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib2" title="" class="ltx_ref">2</a>]</cite>. We use the open source
software called <span class="ltx_text ltx_font_smallcaps">HFST-LexC</span> with a similar interface as the
Xerox LexC tool. The interested reader is referred to
<cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib2" title="" class="ltx_ref">2</a>]</cite> for an exposition of the LexC syntax. The
<span class="ltx_text ltx_font_smallcaps">HFST-LexC</span> tool extends the syntax with support for adding
weights on the lexical entries.</p>
</div>
<div id="S2.SS3.p3" class="ltx_para">
<p class="ltx_p">We note that the noun compounding can be decomposed into two
concatenatable lexicons separated by a word boundary marker, i.e. any
number of noun prefixes <em class="ltx_emph">CompoundNonFinalNoun</em><math id="S2.SS3.p3.m1" class="ltx_Math" alttext="{}^{*}" display="inline"><msup><mi></mi><mo>*</mo></msup></math> in
Figure <a href="#S2.F1" title="Figure 1 ‣ 2.3 Finnish Computational Morphology ‣ 2 Finnish Morphology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">1</span></a> separated by ’#’ and from the inflected
noun forms <em class="ltx_emph">CompoundFinalNoun</em> in
Figure <a href="#S2.F2" title="Figure 2 ‣ 2.3 Finnish Computational Morphology ‣ 2 Finnish Morphology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">2</span></a>. Similar decompositions can be achieved
for other parts of speech as needed. For a further discussion of the
structure of the lexicon, see <cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib7" title="" class="ltx_ref">7</a>]</cite>.</p>
</div>
<figure id="S2.F1" class="ltx_figure"><pre class="ltx_verbatim ltx_centering ltx_font_typewriter" style="font-size:70%;">
LEXICON Root
## CompoundNonFinalNoun ;
## #;
LEXICON Compound
#:0 CompoundNonFinalNoun;
#:0 #;
LEXICON CompoundNonFinalNoun
isä Compound "weight: 0, gloss: father" ;
isän Compound "weight: 0, gloss: father's" ;
äiti Compound "weight: 0, gloss: mother" ;
äidin Compound "weight: 0, gloss: mother's" ;
</pre>
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_figure">Figure 1: </span>Unweighted fragment for
{<em class="ltx_emph">CompoundNonFinalNoun</em>}<math id="S2.F1.m2" class="ltx_Math" alttext="{}^{*}" display="inline"><msup><mi></mi><mo>*</mo></msup></math> i.e. <em class="ltx_emph">noun
prefixes</em>.</figcaption>
</figure>
<figure id="S2.F2" class="ltx_figure"><pre class="ltx_verbatim ltx_centering ltx_font_typewriter" style="font-size:70%;">
LEXICON Root
CompoundFinalNoun ;
LEXICON CompoundFinalNoun
isä:isä+sg+nom ## "weight: 0, gloss: father" ;
isän:isä+sg+gen ## "weight: 0, gloss: father's" ;
isälle:isä+sg+all ## "weight: 0, gloss: to the father" ;
LEXICON ##
## # ;
</pre>
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_figure">Figure 2: </span>Unweighted fragment for <em class="ltx_emph">CompoundFinalNoun</em>, i.e.
<em class="ltx_emph">noun forms</em>.</figcaption>
</figure>
</section>
</section>
<section id="S3" class="ltx_section">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">3 </span>Methodology</h2>
<div id="S3.p1" class="ltx_para">
<p class="ltx_p">Assume that we want to know the probability of a morphological
analysis with a morpheme segmentation <em class="ltx_emph">A</em> given the token
<em class="ltx_emph">a</em>, i.e. <math id="S3.p1.m1" class="ltx_Math" alttext="\mathrm{P}(A|a)" display="inline"><mrow><mi mathvariant="normal">P</mi><mrow><mo stretchy="false">(</mo><mi>A</mi><mo stretchy="false">|</mo><mi>a</mi><mo stretchy="false">)</mo></mrow></mrow></math>. According to Bayes rule, we get
Equation <a href="#S3.E1" title="(1) ‣ 3 Methodology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">1</span></a>.</p>
</div>
<div id="S3.p2" class="ltx_para">
<table id="S3.E1" class="ltx_equation ltx_eqn_table">
<tr class="ltx_equation ltx_eqn_row ltx_align_baseline">
<td class="ltx_eqn_cell ltx_eqn_center_padleft"></td>
<td class="ltx_eqn_cell ltx_align_center"><math id="S3.E1.m1" class="ltx_Math" alttext="\mathrm{P}(A|a)=\mathrm{P}(A,a)/\mathrm{P}(a)=\mathrm{P}(a|A)\mathrm{P}(A)/%
\mathrm{P}(a)" display="block"><mrow><mi mathvariant="normal">P</mi><mrow><mo stretchy="false">(</mo><mi>A</mi><mo stretchy="false">|</mo><mi>a</mi><mo stretchy="false">)</mo></mrow><mo>=</mo><mi mathvariant="normal">P</mi><mrow><mo stretchy="false">(</mo><mi>A</mi><mo>,</mo><mi>a</mi><mo stretchy="false">)</mo></mrow><mo>/</mo><mi mathvariant="normal">P</mi><mrow><mo stretchy="false">(</mo><mi>a</mi><mo stretchy="false">)</mo></mrow><mo>=</mo><mi mathvariant="normal">P</mi><mrow><mo stretchy="false">(</mo><mi>a</mi><mo stretchy="false">|</mo><mi>A</mi><mo stretchy="false">)</mo></mrow><mi mathvariant="normal">P</mi><mrow><mo stretchy="false">(</mo><mi>A</mi><mo stretchy="false">)</mo></mrow><mo>/</mo><mi mathvariant="normal">P</mi><mrow><mo stretchy="false">(</mo><mi>a</mi><mo stretchy="false">)</mo></mrow></mrow></math></td>
<td class="ltx_eqn_cell ltx_eqn_center_padright"></td>
<td rowspan="1" class="ltx_eqn_cell ltx_eqn_eqno ltx_align_middle ltx_align_right"><span class="ltx_tag ltx_tag_equation ltx_align_right">(1)</span></td>
</tr>
</table>
</div>
<div id="S3.p3" class="ltx_para">
<p class="ltx_p">We wish to retain only the most likely analysis and its segmentation
<em class="ltx_emph">A</em>. As we know that <math id="S3.p3.m1" class="ltx_Math" alttext="\mathrm{P}(a|A)" display="inline"><mrow><mi mathvariant="normal">P</mi><mrow><mo stretchy="false">(</mo><mi>a</mi><mo stretchy="false">|</mo><mi>A</mi><mo stretchy="false">)</mo></mrow></mrow></math> is almost always 1, i.e. a
word form is known when its analysis is given. Additionally,
<em class="ltx_emph">P(a)</em> is constant during the maximization, so the expression
simplifies to finding the most likely global analysis <em class="ltx_emph">A</em> as
shown by Equation <a href="#S3.E2" title="(2) ‣ 3 Methodology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">2</span></a>, i.e. we only need to estimate the
output language model.</p>
</div>
<div id="S3.p4" class="ltx_para">
<table id="S3.E2" class="ltx_equation ltx_eqn_table">
<tr class="ltx_equation ltx_eqn_row ltx_align_baseline">
<td class="ltx_eqn_cell ltx_eqn_center_padleft"></td>
<td class="ltx_eqn_cell ltx_align_center"><math id="S3.E2.m1" class="ltx_Math" alttext="\arg\max_{A}\mathrm{P}(A|a)=\arg\max_{A}\mathrm{P}(a|A)\mathrm{P}(A)/\mathrm{P%
}(a)=\arg\max_{A}\mathrm{P}(A)" display="block"><mrow><mi>arg</mi><munder><mi>max</mi><mi>A</mi></munder><mi mathvariant="normal">P</mi><mrow><mo stretchy="false">(</mo><mi>A</mi><mo stretchy="false">|</mo><mi>a</mi><mo stretchy="false">)</mo></mrow><mo>=</mo><mi>arg</mi><munder><mi>max</mi><mi>A</mi></munder><mi mathvariant="normal">P</mi><mrow><mo stretchy="false">(</mo><mi>a</mi><mo stretchy="false">|</mo><mi>A</mi><mo stretchy="false">)</mo></mrow><mi mathvariant="normal">P</mi><mrow><mo stretchy="false">(</mo><mi>A</mi><mo stretchy="false">)</mo></mrow><mo>/</mo><mi mathvariant="normal">P</mi><mrow><mo stretchy="false">(</mo><mi>a</mi><mo stretchy="false">)</mo></mrow><mo>=</mo><mi>arg</mi><munder><mi>max</mi><mi>A</mi></munder><mi mathvariant="normal">P</mi><mrow><mo stretchy="false">(</mo><mi>A</mi><mo stretchy="false">)</mo></mrow></mrow></math></td>
<td class="ltx_eqn_cell ltx_eqn_center_padright"></td>
<td rowspan="1" class="ltx_eqn_cell ltx_eqn_eqno ltx_align_middle ltx_align_right"><span class="ltx_tag ltx_tag_equation ltx_align_right">(2)</span></td>
</tr>
</table>
</div>
<div id="S3.p5" class="ltx_para">
<p class="ltx_p">In order to find the most likely segmentation of <em class="ltx_emph">A</em>, we can make
the additional assumption that the probability <em class="ltx_emph">P(A)</em> is
proportional to the product of the probabilities <math id="S3.p5.m1" class="ltx_Math" alttext="\mathrm{P}(s_{i})" display="inline"><mrow><mi mathvariant="normal">P</mi><mo></mo><mrow><mo stretchy="false">(</mo><msub><mi>s</mi><mi>i</mi></msub><mo stretchy="false">)</mo></mrow></mrow></math> of
the segments of <em class="ltx_emph">A</em>, where <math id="S3.p5.m2" class="ltx_Math" alttext="A=s_{1}s_{2}...s_{n}" display="inline"><mrow><mi>A</mi><mo>=</mo><mrow><msub><mi>s</mi><mn>1</mn></msub><mo></mo><msub><mi>s</mi><mn>2</mn></msub><mo></mo><mi mathvariant="normal">…</mi><mo></mo><msub><mi>s</mi><mi>n</mi></msub></mrow></mrow></math>, defined by
Equation <a href="#S3.E3" title="(3) ‣ 3 Methodology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">3</span></a>. This assumption based on a unigram
language model of compounding has been demonstrated by Lindén and
Pirinen <cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib7" title="" class="ltx_ref">7</a>]</cite> to work well in practice.</p>
</div>
<div id="S3.p6" class="ltx_para">
<table id="S3.E3" class="ltx_equation ltx_eqn_table">
<tr class="ltx_equation ltx_eqn_row ltx_align_baseline">
<td class="ltx_eqn_cell ltx_eqn_center_padleft"></td>
<td class="ltx_eqn_cell ltx_align_center"><math id="S3.E3.m1" class="ltx_Math" alttext="\mathrm{P}(A)\propto\prod_{s_{i}}\mathrm{P}(s_{i})" display="block"><mrow><mrow><mi mathvariant="normal">P</mi><mo></mo><mrow><mo stretchy="false">(</mo><mi>A</mi><mo stretchy="false">)</mo></mrow></mrow><mo>∝</mo><mrow><munder><mo largeop="true" movablelimits="false" symmetric="true">∏</mo><msub><mi>s</mi><mi>i</mi></msub></munder><mrow><mi mathvariant="normal">P</mi><mo></mo><mrow><mo stretchy="false">(</mo><msub><mi>s</mi><mi>i</mi></msub><mo stretchy="false">)</mo></mrow></mrow></mrow></mrow></math></td>
<td class="ltx_eqn_cell ltx_eqn_center_padright"></td>
<td rowspan="1" class="ltx_eqn_cell ltx_eqn_eqno ltx_align_middle ltx_align_right"><span class="ltx_tag ltx_tag_equation ltx_align_right">(3)</span></td>
</tr>
</table>
</div>
<section id="S3.SS1" class="ltx_subsection">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection">3.1 </span>Estimating probabilities</h3>
<div id="S3.SS1.p1" class="ltx_para">
<p class="ltx_p">The estimated probability of a token, <em class="ltx_emph">a</em>, to occur in the corpus
is proportional to the count, <em class="ltx_emph">c(a)</em>, divided by the corpus size,
<em class="ltx_emph">cs</em>. The probability <em class="ltx_emph">p(a)</em> of a token in the corpus is
defined by Equation <a href="#S3.E4" title="(4) ‣ 3.1 Estimating probabilities ‣ 3 Methodology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">4</span></a>. We also note that the corpus
estimate for <em class="ltx_emph">p(a)</em> is in fact an estimate of the sum of the
probabilities of all the possible analyses and segmentations of
<em class="ltx_emph">a</em> in the corpus.</p>
</div>
<div id="S3.SS1.p2" class="ltx_para">
<table id="S3.E4" class="ltx_equation ltx_eqn_table">
<tr class="ltx_equation ltx_eqn_row ltx_align_baseline">
<td class="ltx_eqn_cell ltx_eqn_center_padleft"></td>
<td class="ltx_eqn_cell ltx_align_center"><math id="S3.E4.m1" class="ltx_Math" alttext="\mathrm{p}(a)=\mathrm{c}(a)/\mathrm{cs}" display="block"><mrow><mrow><mi mathvariant="normal">p</mi><mo></mo><mrow><mo stretchy="false">(</mo><mi>a</mi><mo stretchy="false">)</mo></mrow></mrow><mo>=</mo><mrow><mrow><mi mathvariant="normal">c</mi><mo></mo><mrow><mo stretchy="false">(</mo><mi>a</mi><mo stretchy="false">)</mo></mrow></mrow><mo>/</mo><mi>cs</mi></mrow></mrow></math></td>
<td class="ltx_eqn_cell ltx_eqn_center_padright"></td>
<td rowspan="1" class="ltx_eqn_cell ltx_eqn_eqno ltx_align_middle ltx_align_right"><span class="ltx_tag ltx_tag_equation ltx_align_right">(4)</span></td>
</tr>
</table>
</div>
<div id="S3.SS1.p3" class="ltx_para">
<p class="ltx_p">Tokens <em class="ltx_emph">x</em> known to the original lexicon but unseen in the corpus
need to be assigned a small probability mass different from 0, so they
get <em class="ltx_emph">c(x) = 1</em>, i.e. we define the count of a token as its corpus
frequency plus 1 as in Equation <a href="#S3.E5" title="(5) ‣ 3.1 Estimating probabilities ‣ 3 Methodology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">5</span></a>, also known as Laplace
smoothing.</p>
</div>
<div id="S3.SS1.p4" class="ltx_para">
<table id="S3.E5" class="ltx_equation ltx_eqn_table">
<tr class="ltx_equation ltx_eqn_row ltx_align_baseline">
<td class="ltx_eqn_cell ltx_eqn_center_padleft"></td>
<td class="ltx_eqn_cell ltx_align_center"><math id="S3.E5.m1" class="ltx_Math" alttext="\mathrm{c}(a)=1+\mathrm{frequency}(a)" display="block"><mrow><mrow><mi mathvariant="normal">c</mi><mo></mo><mrow><mo stretchy="false">(</mo><mi>a</mi><mo stretchy="false">)</mo></mrow></mrow><mo>=</mo><mrow><mn>1</mn><mo>+</mo><mrow><mi>frequency</mi><mo></mo><mrow><mo stretchy="false">(</mo><mi>a</mi><mo stretchy="false">)</mo></mrow></mrow></mrow></mrow></math></td>
<td class="ltx_eqn_cell ltx_eqn_center_padright"></td>
<td rowspan="1" class="ltx_eqn_cell ltx_eqn_eqno ltx_align_middle ltx_align_right"><span class="ltx_tag ltx_tag_equation ltx_align_right">(5)</span></td>
</tr>
</table>
</div>
</section>
<section id="S3.SS2" class="ltx_subsection">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection">3.2 </span>Weighting the Lexicon</h3>
<div id="S3.SS2.p1" class="ltx_para">
<p class="ltx_p">In order to use the probabilities as weights in the lexicon, we
implement them in the tropical semiring, which means that we use the
negative log-probabilities as defined by Equation <a href="#S3.E6" title="(6) ‣ 3.2 Weighting the Lexicon ‣ 3 Methodology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">6</span></a>.</p>
</div>
<div id="S3.SS2.p2" class="ltx_para">
<table id="S3.E6" class="ltx_equation ltx_eqn_table">
<tr class="ltx_equation ltx_eqn_row ltx_align_baseline">
<td class="ltx_eqn_cell ltx_eqn_center_padleft"></td>
<td class="ltx_eqn_cell ltx_align_center"><math id="S3.E6.m1" class="ltx_Math" alttext="\mathrm{w}(a)=-\mathrm{log}(p(a))" display="block"><mrow><mrow><mi mathvariant="normal">w</mi><mo></mo><mrow><mo stretchy="false">(</mo><mi>a</mi><mo stretchy="false">)</mo></mrow></mrow><mo>=</mo><mrow><mo>-</mo><mrow><mi>log</mi><mo></mo><mrow><mo stretchy="false">(</mo><mrow><mi>p</mi><mo></mo><mrow><mo stretchy="false">(</mo><mi>a</mi><mo stretchy="false">)</mo></mrow></mrow><mo stretchy="false">)</mo></mrow></mrow></mrow></mrow></math></td>
<td class="ltx_eqn_cell ltx_eqn_center_padright"></td>
<td rowspan="1" class="ltx_eqn_cell ltx_eqn_eqno ltx_align_middle ltx_align_right"><span class="ltx_tag ltx_tag_equation ltx_align_right">(6)</span></td>
</tr>
</table>
</div>
<div id="S3.SS2.p3" class="ltx_para">
<p class="ltx_p">In the tropical semiring, probability multiplication corresponds to
weight addition and probability addition corresponds to weight
maximization. In <span class="ltx_text ltx_font_smallcaps">HFST-LexC</span>, we use OpenFST <cite class="ltx_cite ltx_citemacro_cite">[<a href="#bib.bib1" title="" class="ltx_ref">1</a>]</cite> as
the software library for weighted finite-state transducers.</p>
</div>
<figure id="S3.F3" class="ltx_figure"><pre class="ltx_verbatim ltx_centering ltx_font_typewriter" style="font-size:70%;">
LEXICON Root
## CompoundNonFinalNoun ;
## CompoundFinalNoun ;
LEXICON Compound
0:# CompoudNonFinalNoun;
0:# CompoudFinalNoun;
LEXICON CompoundNonFinalNoun
isä Compound "weight: -log(c(isä)/cs)" ;
isän Compound "weight: -log(c(isän)/cs)" ;
äiti Compound "weight: -log(c(äiti)/cs)" ;
äidin Compound "weight: -log(c(äidin)/cs)" ;
LEXICON CompoundFinalNoun
isä+sg+nom ## "weight:-log(c(isä+sg+nom)/cs)" ;
isä+sg+gen ## "weight:-log(c(isä+sg+gen)/cs)" ;
isä+sg+all ## "weight:-log(c(isä+sg+all)/cs)" ;
isä+pl+ins ## "weight:-log(c(isä+sg+all)/cs)" ;
LEXICON ##
## # ;
</pre>
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_figure">Figure 3: </span>Structure weighting scheme using token penalties on the
output language. Note that the functions in the comment field are
placeholders for the actual weights.</figcaption>
</figure>
<div id="S3.SS2.p4" class="ltx_para">
<p class="ltx_p">For short, we call our unweighted compounding lexicon, <em class="ltx_emph">Lex</em>, and
the decomposed noun compounding lexicon parts, i.e. the noun prefixes
<em class="ltx_emph">CompoundNonFinalNoun</em><math id="S3.SS2.p4.m1" class="ltx_Math" alttext="{}^{*}" display="inline"><msup><mi></mi><mo>*</mo></msup></math> in Figure <a href="#S2.F1" title="Figure 1 ‣ 2.3 Finnish Computational Morphology ‣ 2 Finnish Morphology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">1</span></a> and
the inflected noun forms <em class="ltx_emph">CompoundFinalNoun</em> in
Figure <a href="#S2.F2" title="Figure 2 ‣ 2.3 Finnish Computational Morphology ‣ 2 Finnish Morphology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">2</span></a>, <em class="ltx_emph">Pref</em> and <em class="ltx_emph">Final</em>,
respectively.</p>
</div>
<div id="S3.SS2.p5" class="ltx_para">
<p class="ltx_p">For an illustration of how the weighting scheme can be implemented in
the weighted output language model, <math id="S3.SS2.p5.m1" class="ltx_Math" alttext="WLex" display="inline"><mrow><mi>W</mi><mo></mo><mi>L</mi><mo></mo><mi>e</mi><mo></mo><mi>x</mi></mrow></math>, of the noun compounding
lexicon, see Figure <a href="#S3.F3" title="Figure 3 ‣ 3.2 Weighting the Lexicon ‣ 3 Methodology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">3</span></a>. There is an obvious extension
of the weighting scheme to the output models of the decomposed
unweighted lexicons, <em class="ltx_emph">Pref</em> and <em class="ltx_emph">Final</em>. We call these
weighted output language models <em class="ltx_emph">WPref</em> and <em class="ltx_emph">WFinal</em>,
respectively.</p>
</div>
</section>
<section id="S3.SS3" class="ltx_subsection">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection">3.3 </span>Back Off Model</h3>
<div id="S3.SS3.p1" class="ltx_para">
<p class="ltx_p">The original lexicon, <math id="S3.SS3.p1.m1" class="ltx_Math" alttext="Lex" display="inline"><mrow><mi>L</mi><mo></mo><mi>e</mi><mo></mo><mi>x</mi></mrow></math>, can be weighted by composing it with the
weighted output language, <math id="S3.SS3.p1.m2" class="ltx_Math" alttext="WLex" display="inline"><mrow><mi>W</mi><mo></mo><mi>L</mi><mo></mo><mi>e</mi><mo></mo><mi>x</mi></mrow></math>, as in
Equation <a href="#S3.E7" title="(7) ‣ 3.3 Back Off Model ‣ 3 Methodology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">7</span></a>. However, there are a number of word
forms and compound segments in the lexicon, for which no estimate is
available in the corpus. We wish to assign a large weight to these
forms and segments, i.e. a weight <em class="ltx_emph">M</em> which is greater than any
of the weights estimated from the corpus, e.g. <math id="S3.SS3.p1.m3" class="ltx_Math" alttext="M=log(1+\mathrm{cs})" display="inline"><mrow><mi>M</mi><mo>=</mo><mrow><mi>l</mi><mo></mo><mi>o</mi><mo></mo><mi>g</mi><mo></mo><mrow><mo stretchy="false">(</mo><mrow><mn>1</mn><mo>+</mo><mi>cs</mi></mrow><mo stretchy="false">)</mo></mrow></mrow></mrow></math>. To calculate the missing words, we first use the
homomorphism <math id="S3.SS3.p1.m4" class="ltx_Math" alttext="uw" display="inline"><mrow><mi>u</mi><mo></mo><mi>w</mi></mrow></math> to map the <math id="S3.SS3.p1.m5" class="ltx_Math" alttext="WPref" display="inline"><mrow><mi>W</mi><mo></mo><mi>P</mi><mo></mo><mi>r</mi><mo></mo><mi>e</mi><mo></mo><mi>f</mi></mrow></math> to an unweighted automata, which we
subtract from <math id="S3.SS3.p1.m6" class="ltx_Math" alttext="\Sigma^{*}" display="inline"><msup><mi mathvariant="normal">Σ</mi><mo>*</mo></msup></math> and give the output model the final weight
<math id="S3.SS3.p1.m7" class="ltx_Math" alttext="M" display="inline"><mi>M</mi></math> using the homomorphism <math id="S3.SS3.p1.m8" class="ltx_Math" alttext="mw" display="inline"><mrow><mi>m</mi><mo></mo><mi>w</mi></mrow></math>.</p>
</div>
<div id="S3.SS3.p2" class="ltx_para">
<p class="ltx_p">We create the following new sublexicons using automata difference and
composition with the original decomposed transducers in
Equations <a href="#S3.E8" title="(8) ‣ 3.3 Back Off Model ‣ 3 Methodology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">8</span></a> and <a href="#S3.E9" title="(9) ‣ 3.3 Back Off Model ‣ 3 Methodology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">9</span></a>.</p>
</div>
<div id="S3.SS3.p3" class="ltx_para">
<table id="A0.EGx1" class="ltx_equationgroup ltx_eqn_eqnarray ltx_eqn_table">
<tbody id="S3.E7"><tr class="ltx_equation ltx_eqn_row ltx_align_baseline">
<td class="ltx_eqn_cell ltx_eqn_center_padleft"></td>
<td class="ltx_td ltx_align_right ltx_eqn_cell"><math id="S3.E7.m1" class="ltx_Math" alttext="\displaystyle KnownAndSeenWords" display="inline"><mrow><mi>K</mi><mo></mo><mi>n</mi><mo></mo><mi>o</mi><mo></mo><mi>w</mi><mo></mo><mi>n</mi><mo></mo><mi>A</mi><mo></mo><mi>n</mi><mo></mo><mi>d</mi><mo></mo><mi>S</mi><mo></mo><mi>e</mi><mo></mo><mi>e</mi><mo></mo><mi>n</mi><mo></mo><mi>W</mi><mo></mo><mi>o</mi><mo></mo><mi>r</mi><mo></mo><mi>d</mi><mo></mo><mi>s</mi></mrow></math></td>
<td class="ltx_td ltx_align_center ltx_eqn_cell"><math id="S3.E7.m2" class="ltx_Math" alttext="\displaystyle=" display="inline"><mo>=</mo></math></td>
<td class="ltx_td ltx_align_left ltx_eqn_cell"><math id="S3.E7.m3" class="ltx_Math" alttext="\displaystyle Lex~{}o~{}WLex" display="inline"><mrow><mi>L</mi><mo></mo><mi>e</mi><mo></mo><mpadded width="+3.3pt"><mi>x</mi></mpadded><mo></mo><mpadded width="+3.3pt"><mi>o</mi></mpadded><mo></mo><mi>W</mi><mo></mo><mi>L</mi><mo></mo><mi>e</mi><mo></mo><mi>x</mi></mrow></math></td>
<td class="ltx_eqn_cell ltx_eqn_center_padright"></td>
<td rowspan="1" class="ltx_eqn_cell ltx_eqn_eqno ltx_align_middle ltx_align_right"><span class="ltx_tag ltx_tag_equation ltx_align_right">(7)</span></td>
</tr></tbody>
<tbody id="S3.E8"><tr class="ltx_equation ltx_eqn_row ltx_align_baseline">
<td class="ltx_eqn_cell ltx_eqn_center_padleft"></td>
<td class="ltx_td ltx_align_right ltx_eqn_cell"><math id="S3.E8.m1" class="ltx_Math" alttext="\displaystyle MaxUnseenPref" display="inline"><mrow><mi>M</mi><mo></mo><mi>a</mi><mo></mo><mi>x</mi><mo></mo><mi>U</mi><mo></mo><mi>n</mi><mo></mo><mi>s</mi><mo></mo><mi>e</mi><mo></mo><mi>e</mi><mo></mo><mi>n</mi><mo></mo><mi>P</mi><mo></mo><mi>r</mi><mo></mo><mi>e</mi><mo></mo><mi>f</mi></mrow></math></td>
<td class="ltx_td ltx_align_center ltx_eqn_cell"><math id="S3.E8.m2" class="ltx_Math" alttext="\displaystyle=" display="inline"><mo>=</mo></math></td>
<td class="ltx_td ltx_align_left ltx_eqn_cell"><math id="S3.E8.m3" class="ltx_Math" alttext="\displaystyle Pref~{}o~{}(mw(\Sigma^{*}-uw(WPref)))" display="inline"><mrow><mi>P</mi><mo></mo><mi>r</mi><mo></mo><mi>e</mi><mo></mo><mpadded width="+3.3pt"><mi>f</mi></mpadded><mo></mo><mpadded width="+3.3pt"><mi>o</mi></mpadded><mo></mo><mrow><mo stretchy="false">(</mo><mrow><mi>m</mi><mo></mo><mi>w</mi><mo></mo><mrow><mo stretchy="false">(</mo><mrow><msup><mi mathvariant="normal">Σ</mi><mo>*</mo></msup><mo>-</mo><mrow><mi>u</mi><mo></mo><mi>w</mi><mo></mo><mrow><mo stretchy="false">(</mo><mrow><mi>W</mi><mo></mo><mi>P</mi><mo></mo><mi>r</mi><mo></mo><mi>e</mi><mo></mo><mi>f</mi></mrow><mo stretchy="false">)</mo></mrow></mrow></mrow><mo stretchy="false">)</mo></mrow></mrow><mo stretchy="false">)</mo></mrow></mrow></math></td>
<td class="ltx_eqn_cell ltx_eqn_center_padright"></td>
<td rowspan="1" class="ltx_eqn_cell ltx_eqn_eqno ltx_align_middle ltx_align_right"><span class="ltx_tag ltx_tag_equation ltx_align_right">(8)</span></td>
</tr></tbody>
<tbody id="S3.E9"><tr class="ltx_equation ltx_eqn_row ltx_align_baseline">
<td class="ltx_eqn_cell ltx_eqn_center_padleft"></td>
<td class="ltx_td ltx_align_right ltx_eqn_cell"><math id="S3.E9.m1" class="ltx_Math" alttext="\displaystyle MaxUnseenFinal" display="inline"><mrow><mi>M</mi><mo></mo><mi>a</mi><mo></mo><mi>x</mi><mo></mo><mi>U</mi><mo></mo><mi>n</mi><mo></mo><mi>s</mi><mo></mo><mi>e</mi><mo></mo><mi>e</mi><mo></mo><mi>n</mi><mo></mo><mi>F</mi><mo></mo><mi>i</mi><mo></mo><mi>n</mi><mo></mo><mi>a</mi><mo></mo><mi>l</mi></mrow></math></td>
<td class="ltx_td ltx_align_center ltx_eqn_cell"><math id="S3.E9.m2" class="ltx_Math" alttext="\displaystyle=" display="inline"><mo>=</mo></math></td>
<td class="ltx_td ltx_align_left ltx_eqn_cell"><math id="S3.E9.m3" class="ltx_Math" alttext="\displaystyle Final~{}o~{}(mw(\Sigma^{*}-uw(WFinal)))" display="inline"><mrow><mi>F</mi><mo></mo><mi>i</mi><mo></mo><mi>n</mi><mo></mo><mi>a</mi><mo></mo><mpadded width="+3.3pt"><mi>l</mi></mpadded><mo></mo><mpadded width="+3.3pt"><mi>o</mi></mpadded><mo></mo><mrow><mo stretchy="false">(</mo><mrow><mi>m</mi><mo></mo><mi>w</mi><mo></mo><mrow><mo stretchy="false">(</mo><mrow><msup><mi mathvariant="normal">Σ</mi><mo>*</mo></msup><mo>-</mo><mrow><mi>u</mi><mo></mo><mi>w</mi><mo></mo><mrow><mo stretchy="false">(</mo><mrow><mi>W</mi><mo></mo><mi>F</mi><mo></mo><mi>i</mi><mo></mo><mi>n</mi><mo></mo><mi>a</mi><mo></mo><mi>l</mi></mrow><mo stretchy="false">)</mo></mrow></mrow></mrow><mo stretchy="false">)</mo></mrow></mrow><mo stretchy="false">)</mo></mrow></mrow></math></td>
<td class="ltx_eqn_cell ltx_eqn_center_padright"></td>
<td rowspan="1" class="ltx_eqn_cell ltx_eqn_eqno ltx_align_middle ltx_align_right"><span class="ltx_tag ltx_tag_equation ltx_align_right">(9)</span></td>
</tr></tbody>
</table>
</div>
<div id="S3.SS3.p4" class="ltx_para">
<p class="ltx_p">These sublexicons can be combined as specified in
Equation <a href="#S3.Ex1" title="3.3 Back Off Model ‣ 3 Methodology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">3.3</span></a> to cover the whole of the original
lexicon.</p>
</div>
<div id="S3.SS3.p5" class="ltx_para">
<table id="A0.EGx2" class="ltx_equationgroup ltx_eqn_eqnarray ltx_eqn_table">
<tr id="S3.Ex1" class="ltx_equation ltx_eqn_row ltx_align_baseline">
<td class="ltx_eqn_cell ltx_eqn_center_padleft"></td>
<td class="ltx_td ltx_align_right ltx_eqn_cell"><math id="S3.Ex1.m1" class="ltx_Math" alttext="\displaystyle WeightedLexicon~{}=~{}KnownAndSeenWords~{}|~{}Pref~{}MaxUnseenFinal" display="inline"><mrow><mi>W</mi><mi>e</mi><mi>i</mi><mi>g</mi><mi>h</mi><mi>t</mi><mi>e</mi><mi>d</mi><mi>L</mi><mi>e</mi><mi>x</mi><mi>i</mi><mi>c</mi><mi>o</mi><mpadded width="+3.3pt"><mi>n</mi></mpadded><mo rspace="5.8pt">=</mo><mi>K</mi><mi>n</mi><mi>o</mi><mi>w</mi><mi>n</mi><mi>A</mi><mi>n</mi><mi>d</mi><mi>S</mi><mi>e</mi><mi>e</mi><mi>n</mi><mi>W</mi><mi>o</mi><mi>r</mi><mi>d</mi><mpadded width="+3.3pt"><mi>s</mi></mpadded><mo rspace="5.8pt" stretchy="false">|</mo><mi>P</mi><mi>r</mi><mi>e</mi><mpadded width="+3.3pt"><mi>f</mi></mpadded><mi>M</mi><mi>a</mi><mi>x</mi><mi>U</mi><mi>n</mi><mi>s</mi><mi>e</mi><mi>e</mi><mi>n</mi><mi>F</mi><mi>i</mi><mi>n</mi><mi>a</mi><mi>l</mi></mrow></math></td>
<td class="ltx_eqn_cell ltx_eqn_center_padright"></td>
</tr>
<tbody id="S3.E10"><tr class="ltx_equation ltx_eqn_row ltx_align_baseline">
<td class="ltx_eqn_cell ltx_eqn_center_padleft"></td>
<td class="ltx_td ltx_align_right ltx_eqn_cell"><math id="S3.E10.m1" class="ltx_Math" alttext="\displaystyle|~{}MaxUnseenPref~{}Final~{}|~{}MaxUnseenPref~{}MaxUnseenFinal" display="inline"><mrow><mrow><mo rspace="5.8pt" stretchy="false">|</mo><mrow><mi>M</mi><mo></mo><mi>a</mi><mo></mo><mi>x</mi><mo></mo><mi>U</mi><mo></mo><mi>n</mi><mo></mo><mi>s</mi><mo></mo><mi>e</mi><mo></mo><mi>e</mi><mo></mo><mi>n</mi><mo></mo><mi>P</mi><mo></mo><mi>r</mi><mo></mo><mi>e</mi><mo></mo><mpadded width="+3.3pt"><mi>f</mi></mpadded><mo></mo><mi>F</mi><mo></mo><mi>i</mi><mo></mo><mi>n</mi><mo></mo><mi>a</mi><mo></mo><mpadded width="+3.3pt"><mi>l</mi></mpadded></mrow><mo rspace="5.8pt" stretchy="false">|</mo></mrow><mo></mo><mi>M</mi><mo></mo><mi>a</mi><mo></mo><mi>x</mi><mo></mo><mi>U</mi><mo></mo><mi>n</mi><mo></mo><mi>s</mi><mo></mo><mi>e</mi><mo></mo><mi>e</mi><mo></mo><mi>n</mi><mo></mo><mi>P</mi><mo></mo><mi>r</mi><mo></mo><mi>e</mi><mo></mo><mpadded width="+3.3pt"><mi>f</mi></mpadded><mo></mo><mi>M</mi><mo></mo><mi>a</mi><mo></mo><mi>x</mi><mo></mo><mi>U</mi><mo></mo><mi>n</mi><mo></mo><mi>s</mi><mo></mo><mi>e</mi><mo></mo><mi>e</mi><mo></mo><mi>n</mi><mo></mo><mi>F</mi><mo></mo><mi>i</mi><mo></mo><mi>n</mi><mo></mo><mi>a</mi><mo></mo><mi>l</mi></mrow></math></td>
<td class="ltx_eqn_cell ltx_eqn_center_padright"></td>
<td rowspan="1" class="ltx_eqn_cell ltx_eqn_eqno ltx_align_middle ltx_align_right"><span class="ltx_tag ltx_tag_equation ltx_align_right">(10)</span></td>
</tr></tbody>
</table>
</div>
<div id="S3.SS3.p6" class="ltx_para">
<p class="ltx_p">The <math id="S3.SS3.p6.m1" class="ltx_Math" alttext="WeightedLexicon" display="inline"><mrow><mi>W</mi><mo></mo><mi>e</mi><mo></mo><mi>i</mi><mo></mo><mi>g</mi><mo></mo><mi>h</mi><mo></mo><mi>t</mi><mo></mo><mi>e</mi><mo></mo><mi>d</mi><mo></mo><mi>L</mi><mo></mo><mi>e</mi><mo></mo><mi>x</mi><mo></mo><mi>i</mi><mo></mo><mi>c</mi><mo></mo><mi>o</mi><mo></mo><mi>n</mi></mrow></math> will assign the lowest corpus weight to the most
likely reading and the highest corpus weight to the most unlikely
reading of the original lexical transducer.</p>
</div>
</section>
</section>
<section id="S4" class="ltx_section">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">4 </span>Data Sets</h2>
<div id="S4.p1" class="ltx_para">
<p class="ltx_p">As training and test data, we use a compilation of three years,
1995-1997, of daily issues of Helsingin Sanomat, which is the most
wide-spread Finnish newspaper. We disambiguated the corpus using
Machinese for Finnish<span class="ltx_note ltx_role_footnote"><sup class="ltx_note_mark">3</sup><span class="ltx_note_outer"><span class="ltx_note_content"><sup class="ltx_note_mark">3</sup>Machinese is available from Connexor
Ltd., www.connexor.com</span></span></span> which provided one reading in context for
each word using syntactic parsing. This provided us with a
mechanically derived standard and not a human controlled gold
standard.</p>
</div>
<section id="S4.SS1" class="ltx_subsection">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection">4.1 </span>Training Data</h3>
<div id="S4.SS1.p1" class="ltx_para">
<p class="ltx_p">The training data actually spanned 2.5 years with 1995 and 1996 of
equal size and 1997 only half of this. This collection contained
approximately 2.4 million different words, i.e. types, corresponding
to approximately 70 million words of Finnish, i.e. tokens, divided
into 29 million tokens for 1995, 29 for 1996 and 11 for 1997. We used
the training data to count the non-compound tokens and their analyses.</p>
</div>
</section>
<section id="S4.SS2" class="ltx_subsection">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection">4.2 </span>Test Data</h3>
<div id="S4.SS2.p1" class="ltx_para">
<p class="ltx_p">From the three years of training data we extracted running text from
comparable sections of the news paper data. We chose articles from the
section reporting on general news with normal running text (as a
contrast to e.g. the economy or sports section with significant
amounts of numbers and tables). The extracted test data sets contained
118 838, 134 837 and 193 733 tokens for 1995, 1996 and 1997,
respectively. We used the test data to verify the result of the
disambiguation.</p>
</div>
</section>
<section id="S4.SS3" class="ltx_subsection">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection">4.3 </span>Baseline</h3>
<div id="S4.SS3.p1" class="ltx_para">
<p class="ltx_p">As a baseline method, we use the training data as such to create
statistical unigram taggers as outlined in Section <a href="#S3" title="3 Methodology ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">3</span></a>. In
Table <a href="#S4.T1" title="Table 1 ‣ 4.3 Baseline ‣ 4 Data Sets ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">1</span></a>, we show the baseline result for the test
data samples with a given training data tagger, the number of tokens
with 1st correct reading, the number of tokens with some other correct
reading, the number of tokens with some readings but no correct and
the number of tokens with no reading.</p>
</div>
<figure id="S4.T1" class="ltx_table">
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_table">Table 1: </span>Baseline of the tagger test data.
</figcaption>
<table class="ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle">
<tbody class="ltx_tbody">
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;"> Train</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;"> Test</span></th>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t">
<span class="ltx_text" style="font-size:70%;"> </span><math id="S4.T1.m1" class="ltx_Math" alttext="1^{st}" display="inline"><msup><mn mathsize="70%">1</mn><mrow><mi mathsize="70%">s</mi><mo></mo><mi mathsize="70%">t</mi></mrow></msup></math>
</td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t">
<span class="ltx_text" style="font-size:70%;"> </span><math id="S4.T1.m2" class="ltx_Math" alttext="n^{th}" display="inline"><msup><mi mathsize="70%">n</mi><mrow><mi mathsize="70%">t</mi><mo></mo><mi mathsize="70%">h</mi></mrow></msup></math>
</td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;"> No</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;"> No</span></td>
<td class="ltx_td ltx_align_center ltx_border_t"><span class="ltx_text" style="font-size:70%;"> Comment</span></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Year</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Year</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Correct (%)</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Correct (%)</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Correct (%)</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Analysis (%)</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">96.3</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">3.7</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">0.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">0.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_t"><span class="ltx_text" style="font-size:70%;"> Max.</span></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">92.2</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">3.3</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.3</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">4.1</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">91.9</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">3.3</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.3</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">4.6</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">91.9</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">3.4</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">0.4</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">4.5</span></td>
<td class="ltx_td ltx_border_t"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">96.4</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">3.6</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.0</span></td>
<td class="ltx_td ltx_align_center"><span class="ltx_text" style="font-size:70%;"> Max.</span></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">92.4</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">3.2</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.3</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">4.1</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">89.6</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">3.3</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">0.5</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">6.6</span></td>
<td class="ltx_td ltx_border_t"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">90.1</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">3.2</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.4</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">6.2</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<td class="ltx_td ltx_align_center ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">96.7</span></td>
<td class="ltx_td ltx_align_center ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">3.3</span></td>
<td class="ltx_td ltx_align_center ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_b"><span class="ltx_text" style="font-size:70%;"> Max.</span></td>
</tr>
</tbody>
</table>
</figure>
</section>
</section>
<section id="S5" class="ltx_section">
<h2 class="ltx_title ltx_title_section">
<span class="ltx_tag ltx_tag_section">5 </span>Tests and Results</h2>
<div id="S5.p1" class="ltx_para">
<p class="ltx_p">We created two versions of the weighted lexicon for disambiguating
running text. One weights the lexicon using the current corpus and
tests the result using only the weighted lexicon data. The second test
adds the baseline tagger to the lexicon in order to ensure some
additional domain specific data for lack of a guesser.</p>
</div>
<section id="S5.SS1" class="ltx_subsection">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection">5.1 </span>Lexicon-based Unigram Tagger</h3>
<div id="S5.SS1.p1" class="ltx_para">
<p class="ltx_p">We did our first tagging experiment using a full year of news paper
articles as training data for the lexicon and testing with the test
data from the other two years. The first correct results are
consistently at 97 % of the words with some correct
analysis. However, the coverage is totally dependent on the fairly
restricted lexicon as shown in Table <a href="#S5.T2" title="Table 2 ‣ 5.1 Lexicon-based Unigram Tagger ‣ 5 Tests and Results ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">2</span></a>. We also
include the results for testing and training on the same year as an
upper limit or reference.</p>
</div>
<figure id="S5.T2" class="ltx_table">
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_table">Table 2: </span>Lexicon-based unigram tagger results for Finnish.
</figcaption>
<table class="ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle">
<tbody class="ltx_tbody">
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;"> Train</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;"> Test</span></th>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t">
<span class="ltx_text" style="font-size:70%;"> </span><math id="S5.T2.m1" class="ltx_Math" alttext="1^{st}" display="inline"><msup><mn mathsize="70%">1</mn><mrow><mi mathsize="70%">s</mi><mo></mo><mi mathsize="70%">t</mi></mrow></msup></math>
</td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t">
<span class="ltx_text" style="font-size:70%;"> </span><math id="S5.T2.m2" class="ltx_Math" alttext="n^{th}" display="inline"><msup><mi mathsize="70%">n</mi><mrow><mi mathsize="70%">t</mi><mo></mo><mi mathsize="70%">h</mi></mrow></msup></math>
</td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;"> No</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;"> No</span></td>
<td class="ltx_td ltx_align_center ltx_border_t"><span class="ltx_text" style="font-size:70%;"> Comment</span></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Year</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Year</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Correct (%)</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Correct (%)</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Correct (%)</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Analysis (%)</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">68.2</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1.2</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">12.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">18.5</span></td>
<td class="ltx_td ltx_align_center ltx_border_t"><span class="ltx_text" style="font-size:70%;"> Max.</span></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">69.4</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">1.3</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">12.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">17.3</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">69.4</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">1.4</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">11.7</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">17.5</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">67.9</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1.4</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">12.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">18.5</span></td>
<td class="ltx_td ltx_border_t"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">69.7</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">1.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">12.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">17.3</span></td>
<td class="ltx_td ltx_align_center"><span class="ltx_text" style="font-size:70%;"> Max.</span></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">69.4</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">1.3</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">11.7</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">17.5</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">67.9</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1.6</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">12.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">18.5</span></td>
<td class="ltx_td ltx_border_t"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">69.4</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">1.3</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">12.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">17.3</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<td class="ltx_td ltx_align_center ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">69.6</span></td>
<td class="ltx_td ltx_align_center ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">1.3</span></td>
<td class="ltx_td ltx_align_center ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">11.7</span></td>
<td class="ltx_td ltx_align_center ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">17.5</span></td>
<td class="ltx_td ltx_align_center ltx_border_b"><span class="ltx_text" style="font-size:70%;"> Max.</span></td>
</tr>
</tbody>
</table>
</figure>
</section>
<section id="S5.SS2" class="ltx_subsection">
<h3 class="ltx_title ltx_title_subsection">
<span class="ltx_tag ltx_tag_subsection">5.2 </span>Extended Lexicon-based Unigram Tagger</h3>
<div id="S5.SS2.p1" class="ltx_para">
<p class="ltx_p">We did our second tagging experiment as the first with the addition of
using the full year of news paper data for extending the lexicon.
Again, we tested with the test data from the other two years. The
first correct results are consistently at 98 % of the words with some
correct analysis and the coverage is now considerably better as shown
in Table <a href="#S5.T3" title="Table 3 ‣ 5.2 Extended Lexicon-based Unigram Tagger ‣ 5 Tests and Results ‣ Weighting Finite-State Morphological Analyzers using HFST Tools The official publication was in the Proceedings of FSMNLP 2009." class="ltx_ref"><span class="ltx_text ltx_ref_tag">3</span></a>. We also include the results for
testing and training on the same year as an upper limit or reference.</p>
</div>
<figure id="S5.T3" class="ltx_table">
<figcaption class="ltx_caption ltx_centering"><span class="ltx_tag ltx_tag_table">Table 3: </span>Extended lexicon-based unigram tagger results for Finnish.
</figcaption>
<table class="ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle">
<tbody class="ltx_tbody">
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;"> Train</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;"> Test</span></th>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t">
<span class="ltx_text" style="font-size:70%;"> </span><math id="S5.T3.m1" class="ltx_Math" alttext="1^{st}" display="inline"><msup><mn mathsize="70%">1</mn><mrow><mi mathsize="70%">s</mi><mo></mo><mi mathsize="70%">t</mi></mrow></msup></math>
</td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t">
<span class="ltx_text" style="font-size:70%;"> </span><math id="S5.T3.m2" class="ltx_Math" alttext="n^{th}" display="inline"><msup><mi mathsize="70%">n</mi><mrow><mi mathsize="70%">t</mi><mo></mo><mi mathsize="70%">h</mi></mrow></msup></math>
</td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;"> No</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;"> No</span></td>
<td class="ltx_td ltx_align_center ltx_border_t"><span class="ltx_text" style="font-size:70%;"> Comment</span></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Year</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Year</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Correct (%)</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Correct (%)</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Correct (%)</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;"> Analysis (%)</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">95.9</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">4.1</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">0.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">0.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_t"><span class="ltx_text" style="font-size:70%;"> Max.</span></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">93.3</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">4.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.7</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">2.0</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">93.1</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">4.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.6</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">2.3</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">92.9</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">4.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">0.7</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">2.2</span></td>
<td class="ltx_td ltx_border_t"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">96.1</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">3.9</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.0</span></td>
<td class="ltx_td ltx_align_center"><span class="ltx_text" style="font-size:70%;"> Max.</span></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">93.6</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">3.7</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.6</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">1.9</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1995</span></th>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">91.6</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">4.1</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">1.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_r ltx_border_t"><span class="ltx_text" style="font-size:70%;">3.2</span></td>
<td class="ltx_td ltx_border_t"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r"><span class="ltx_text" style="font-size:70%;">1996</span></th>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">92.1</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">3.9</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.9</span></td>
<td class="ltx_td ltx_align_center ltx_border_r"><span class="ltx_text" style="font-size:70%;">3.1</span></td>
<td class="ltx_td"></td>
</tr>
<tr class="ltx_tr">
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<th class="ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">1997</span></th>
<td class="ltx_td ltx_align_center ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">96.3</span></td>
<td class="ltx_td ltx_align_center ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">3.7</span></td>
<td class="ltx_td ltx_align_center ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_b ltx_border_r"><span class="ltx_text" style="font-size:70%;">0.0</span></td>
<td class="ltx_td ltx_align_center ltx_border_b"><span class="ltx_text" style="font-size:70%;"> Max.</span></td>
</tr>
</tbody>
</table>
</figure>
</section>