-
Notifications
You must be signed in to change notification settings - Fork 9
/
query-index.yaml
93 lines (85 loc) · 2.04 KB
/
query-index.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
version: 1
indices:
english: &default
include:
- /en/publish/*/*/*/*
exclude:
- '**/Document.*'
target: /en/query-index.xlsx
properties:
author:
select: head > meta[name="author"]
value: |
attribute(el, 'content')
title:
select: main h1:first-of-type
value: |
textContent(el)
date:
select: head > meta[name="publication-date"]
value: |
dateValue(attribute(el, 'content'), 'MM-DD-YYYY')
image:
select: head > meta[property="og:image"]
value: |
match(attribute(el, 'content'), 'https:\/\/[^/]+(\/.*)')
imageAlt:
select: head > meta[property="og:image:alt"]
value: |
attribute(el, 'content')
description:
select: head > meta[name="description"]
value: |
attribute(el, 'content')
tags:
select: head > meta[property="article:tag"]
values: |
attribute(el, 'content')
robots:
select: head > meta[name="robots"]
value: |
attribute(el, 'content')
lastModified:
select: none
value: |
parseTimestamp(headers['last-modified'], 'ddd, DD MMM YYYY hh:mm:ss GMT')
no-en:
<<: *default
include:
- /en/publish/*/*/*/*
target: /query-index.xlsx
brazilian:
<<: *default
include:
- /br/publish/*/*/*/*
target: /br/query-index.xlsx
german:
<<: *default
include:
- /de/publish/*/*/*/*
target: /de/query-index.xlsx
spanish:
<<: *default
include:
- /es/publish/*/*/*/*
target: /es/query-index.xlsx
french:
<<: *default
include:
- /fr/publish/*/*/*/*
target: /fr/query-index.xlsx
italian:
<<: *default
include:
- /it/publish/*/*/*/*
target: /it/query-index.xlsx
japanese:
<<: *default
include:
- /jp/publish/*/*/*/*
target: /jp/query-index.xlsx
korean:
<<: *default
include:
- /ko/publish/*/*/*/*
target: /ko/query-index.xlsx