Skip to content

Commit

Permalink
Fix Confict
Browse files Browse the repository at this point in the history
  • Loading branch information
yasushi00 committed May 5, 2020
2 parents bbf2d6f + 6450413 commit 8b4ca70
Show file tree
Hide file tree
Showing 10 changed files with 773 additions and 31 deletions.
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,29 @@ jq は出力を見やすく整形するときに利用
## 使い方

### 奈良県/奈良市のニュースjsonの作成
# 奈良県:http://www.pref.nara.jpから取得
# 奈良市:https://www.city.nara.lg.jp
各HPをスクレイピングして, news.json, news_naracity.jsonを作成する.
- 奈良県:http://www.pref.nara.jpから取得
- 奈良市:https://www.city.nara.lg.jpから取得

python3 scraping_naraNews.py [-h] [-pref json file] [-city json file]<br>
python3 scraping_naraNews.py [-h] [-p pref json file] [-c city json file]<br>
- pref json file : default ./data/news.json
- city json file : default ./data/news_naracity.json

### 奈良県内市町村jsonの作成
県内市町村の新型コロナ情報へのURLリストをmunicipalities-data.jsonに変換する.

python3 convert_municipalities.py [-h] [-i excel file] [-o json file]<br>
- excel file : default ./data/municipalities.xlsx<br>
- json file : default ./data/municipalities-data.json

### 奈良県版感染情報jsonの作成
奈良県のオープンデータをdata.json, sickbeds.jsonに変換する.

奈良市版をベースに修正予定
python3 conver_patient.py [-h] [-l list excel file] [-s summary excel file] [-d data json file] [-b beds json file]
- list excel file : default ./data/奈良県_01新型コロナウイルス感染者_患者リスト.xlsx
- summary json file : default ./data/奈良県_02新型コロナウイルス感染者_患者集計表.xlsx
- data json file : default ./data/data.json
- beds json file : default ./data/sickbeds_summary.json

### 奈良市版感染情報jsonの作成

Expand Down
32 changes: 9 additions & 23 deletions convert_naracity.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,38 +15,24 @@
#from processing.querents import parse_querents

#(inspections, inspections_summary_data, inspections_summary_labels), total_count = parse_inspection_per_date()
list_update, patients_list, patients_count, stayed_count, discharge_count, death_count = parse_nara_patients_list()
daily_update, patients_summary, inspections_list, querents_list, inspections_total, stayed_count, discharge_count, death_count = parse_nara_dailystatus()
patients_ldate, patients_list, patients_count, stayed_count, discharge_count, death_count = parse_nara_patients_list()
patients_date, patients_summary, inspections_date, inspections_list, querents_date, querents_list, inspections_total, stayed_count, discharge_count, death_count = parse_nara_dailystatus()

#死亡者を除く
discharge_count-=death_count

#sorted_values = sorted(patients_and_no_symptoms_summary_data.values(), key=lambda d: d["day"])
#patients_and_no_symptoms_summary_data_patients = []
#patients_and_no_symptoms_summary_data_no_symptoms = []
#patients_and_no_symptoms_summary_labels = []
#for d in sorted_values:
# patients_and_no_symptoms_summary_data_patients.append(d["patients"])
# patients_and_no_symptoms_summary_data_no_symptoms.append(d["no_symptoms"])
# patients_and_no_symptoms_summary_labels.append(d["labels"])

strupdate = datetime.now().strftime('%Y/%m/%d %H:%M')
list_update+= timedelta(hours=18)
daily_update+= timedelta(hours=18) # 日付しかないのでその日の18時に修正
listdate = list_update.strftime('%Y/%m/%d %H:%M')
dailydate = daily_update.strftime('%Y/%m/%d %H:%M')
# print( patients_date )

# data.json 雛形
data = {
# 陽性患者
"patients": {
"date": listdate,
#"date": datetime.now().strftime('%Y/%m/%d %H:%M'),
"date": patients_ldate,
"data": patients_list
},

"main_summary": {
"date": dailydate,
"date": patients_date,
"attr": "検査実施人数",
"value": inspections_total,
"children": [
Expand Down Expand Up @@ -82,20 +68,20 @@
},
# 患者数
"patients_summary": {
"date": dailydate,
"date": patients_date,
"data": patients_summary
},
# 検査実施数
"inspections_summary": {
"date": dailydate,
"date": inspections_date,
"data": inspections_list
},
# 相談件数
"querents": {
"date": dailydate,
"date": querents_date,
"data": querents_list
},
"lastUpdate": strupdate
"lastUpdate": datetime.now().strftime('%Y/%m/%d %H:%M')
}

print(json.dumps(data, ensure_ascii=False ) )
Expand Down
272 changes: 272 additions & 0 deletions convert_narapref.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
#!/usr/bin/env python3
import os
import numpy as np
import pandas as pd
import sys
import datetime
from pathlib import Path
import argparse

sys.path.append(str(Path('__file__').resolve().parent))

# Template, File, Directory
DATA_DIR = './data'

SRC_SHEETID = "1C07ojkwER8BiAjLBxlzJkfvgM5jxUCLrdtI7wtctTIY"
SHEET1_NAME = '01.陽性患者の属性'
#SHEET2_NAME = '入院患者の状況'
SHEET2_NAME = '03.陽性者状況'

DEST1_FILE = 'data.json'
DEST2_FILE = 'sickbeds_summary.json'

TAB = ['', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ']

# 患者リストの読み込み
def load_patient_list( dataUri, sheetName ):
# ファイルリード/ダウンロード
df_list = pd.read_excel( dataUri, sheet_name=sheetName, header=None)

# 必要なデータのみに加工
df_list.columns = df_list.iloc[0]
df_list = df_list.drop(range(0,2))
df_list = df_list.drop(['全国地方公共団体コード', '都道府県名', '患者_渡航歴の有無フラグ','患者_退院済フラグ'], axis=1)
# NaNの置換
df_list['備考'] = df_list['備考'].fillna('')
df_list['発症_年月日'] = df_list['発症_年月日'].fillna('')
df_list['患者_居住地'] = df_list['患者_居住地'].fillna('')
df_list['患者_職業'] = df_list['患者_職業'].fillna('')
df_list['患者_状態'] = df_list['患者_状態'].fillna('')
df_list['患者_症状'] = df_list['患者_症状'].fillna('')
# 日付 : object型→datetime型
#df_list['公表_年月日'] = pd.to_datetime(df_list['公表_年月日'], format='%Y/%m/%d')
#df_list['発症_年月日'] = pd.to_datetime(df_list['発症_年月日'], format='%Y/%m/%d')

# 最終更新日時
last_data = df_list.iloc[len(df_list.index)-1]
last_update = last_data['公表_年月日']

return last_update, df_list

# 日々更新データの読み込み
def load_patient_summary( dataUri, sheetName ):
# ファイルリード/ダウンロード
df_summary = pd.read_excel( dataUri, sheet_name=sheetName, header=None)

# 必要なデータのみに加工
df_summary.columns = df_summary.iloc[1]
df_summary = df_summary.drop(range(0,2))

# NaNの置換
df_summary['発表日'] = df_summary['発表日'].fillna('')
df_summary = df_summary[df_summary['発表日'] != '' ]
#print(df_summary)

# 日付 : object型→datetime型
#df_summary['発表日'] = pd.to_datetime(df_summary['発表日'])
#print(df_summary.head())

# 最終更新日時
last_data = df_summary.iloc[len(df_summary.index)-1]
last_update = last_data['発表日']

return last_update, df_summary

# 患者リストの出力
def output_patients_list(f, last_update, patients):
last_data = patients.iloc[len(patients.index)-1]
last_update = last_data['公表_年月日']

f.write(TAB[1] + '"patients":{\n')
f.write(TAB[2] + '"date": "{}",\n'.format(last_update.strftime('%Y/%m/%d')))
f.write(TAB[2] + '"data": [\n')
for i in range(len(patients.index)):
patient = patients.iloc[i]
f.write(TAB[3] + '{\n')
f.write(TAB[4] + '"No": {},\n'.format(patient['No']))
f.write(TAB[4] + '"発表日": "{}",\n'.format(str(patient['公表_年月日'].date()) + 'T08:00:00.000Z'))
f.write(TAB[4] + '"住居地": "{}",\n'.format(patient['患者_居住地']))
f.write(TAB[4] + '"年代": "{}",\n'.format(patient['患者_年代']))
f.write(TAB[4] + '"性別": "{}",\n'.format(patient['患者_性別']))
f.write(TAB[4] + '"職業": "{}",\n'.format(patient['患者_職業']))
f.write(TAB[4] + '"状態": "{}",\n'.format(patient['患者_状態']))
f.write(TAB[4] + '"症状": "{}",\n'.format(patient['患者_症状']))
f.write(TAB[4] + '"発症日": "{}",\n'.format(patient['発症_年月日']))
f.write(TAB[4] + '"備考": "{}"\n'.format(patient['備考']))
if i == (len(patients.index) - 1):
f.write(TAB[3] + '}\n')
else:
f.write(TAB[3] + '},\n')
f.write(TAB[2] + ']\n')
f.write(TAB[1] + '},\n')

# 陽性者発生状況の出力 : 陽性者リストから日々の発生数を計算
def output_patientslist_summary(f, last_update, patients):
start = datetime.datetime(2020, 1, 24, 0, 0, 0)
end = last_update + datetime.timedelta(days=1)
period = (end - start).days

f.write(TAB[1] + '"patients_summary":{\n')
#f.write(TAB[2] + '"date": "{}",\n'.format(last_update.strftime('%Y/%m/%d %H:%M')))
f.write(TAB[2] + '"date": "{}",\n'.format(last_update.strftime('%Y/%m/%d')))
f.write(TAB[2] + '"data": [\n')

for i in range(period):
d = start + datetime.timedelta(days=i)
df = patients[ patients['公表_年月日'] == d]
cnt =len(df)

f.write(TAB[3] + '{\n')
f.write(TAB[4] + '"日付": "{}",\n'.format(str(d.date()) + 'T08:00:00.000Z'))
f.write(TAB[4] + '"小計": {}\n'.format(cnt))
if i == (period - 1):
f.write(TAB[3] + '}\n')
else:
f.write(TAB[3] + '},\n')

f.write(TAB[2] + ']\n')
f.write(TAB[1] + '},\n')

# 陽性者発生状況の出力 : 日々データに養成数がある場合はこちら
def output_patients_summary(f, last_update, summary):
f.write(TAB[1] + '"patients_summary":{\n')
#f.write(TAB[2] + '"date": "{}",\n'.format(last_update.strftime('%Y/%m/%d %H:%M')))
f.write(TAB[2] + '"date": "{}",\n'.format(last_update.strftime('%Y/%m/%d')))
f.write(TAB[2] + '"data": [\n')
start = datetime.datetime(2020, 1, 24, 0, 0, 0)
end = last_update + datetime.timedelta(days=1)
period = (end - start).days
for i in range(period):
d = start + datetime.timedelta(days=i)
idx = list(summary['公表_年月日'][summary['公表_年月日'] == d].index)
if len(idx) == 1:
num = summary['陽性確認_件数'][idx[0]]
else:
num = 0
f.write(TAB[3] + '{\n')
f.write(TAB[4] + '"日付": "{}",\n'.format(str(d.date()) + 'T08:00:00.000Z'))
f.write(TAB[4] + '"小計": {}\n'.format(num))
if i == (period - 1):
f.write(TAB[3] + '}\n')
else:
f.write(TAB[3] + '},\n')
f.write(TAB[2] + ']\n')
f.write(TAB[1] + '},\n')

# 現在(最新)の陽性者状況の出力
def output_main_summary(f, last_update, summary):
last_data = summary.iloc[len(summary.index)-1]
f.write(TAB[1] + '"main_summary":{\n')
#f.write(TAB[2] + '"date": "{}",\n'.format(last_update.strftime('%Y/%m/%d %H:%M')))
f.write(TAB[2] + '"date": "{}",\n'.format(last_update.strftime('%Y/%m/%d')))
f.write(TAB[2] + '"attr": "検査実施人数",\n')
f.write(TAB[2] + '"value": 0,\n')
f.write(TAB[2] + '"children": [\n')
f.write(TAB[3] + '{\n')
f.write(TAB[4] + '"attr": "陽性患者数",\n')
f.write(TAB[4] + '"value": {},\n'.format(last_data['感染者数累計']))
f.write(TAB[4] + '"children": [\n')
f.write(TAB[5] + '{\n')
f.write(TAB[6] + '"attr": "入院患者数",\n')
f.write(TAB[6] + '"value": {},\n'.format(last_data['現在感染者数']))
f.write(TAB[6] + '"children": [\n')
f.write(TAB[7] + '{\n')
f.write(TAB[8] + '"attr": "症状のある方",\n')
f.write(TAB[8] + '"value": {}\n'.format(last_data['入院中']))
f.write(TAB[7] + '},\n')
f.write(TAB[7] + '{\n')
f.write(TAB[8] + '"attr": "症状のない方",\n')
f.write(TAB[8] + '"value": {}\n'.format(last_data['宿泊療養']))
f.write(TAB[7] + '}\n')
f.write(TAB[6] + ']\n')
f.write(TAB[5] + '},\n')
f.write(TAB[5] + '{\n')
f.write(TAB[6] + '"attr": "退院した方",\n')
f.write(TAB[6] + '"value": {}\n'.format(last_data['退院等累計']))
f.write(TAB[5] + '},\n')
f.write(TAB[5] + '{\n')
f.write(TAB[6] + '"attr": "亡くなられた方",\n')
f.write(TAB[6] + '"value": {}\n'.format(last_data['死亡']))
f.write(TAB[5] + '}\n')
f.write(TAB[4] + ']\n')
f.write(TAB[3] + '}\n')
f.write(TAB[2] + ']\n')
f.write(TAB[1] + '},\n')

def output_sickbeds_summary( f, last_update, summary):
last_data = summary.iloc[len(summary.index)-1]
f.write(TAB[1] + '"sickbeds_summary":{\n')
f.write(TAB[2] + '"data": {\n')
# f.write(TAB[3] + '"総病床数": {},\n'.format(last_data['総病床数']))
f.write(TAB[3] + '"入院患者数": {},\n'.format(last_data['入院中']))
f.write(TAB[3] + '"残り病床数": {}\n'.format(last_data['残り病床数']))
f.write(TAB[2] + '},\n')
#f.write(TAB[2] + '"date": "{}"\n'.format(last_update.strftime('%Y/%m/%d %H:%M')))
f.write(TAB[2] + '"date": "{}"\n'.format(last_update.strftime('%Y/%m/%d')))
f.write(TAB[1] + '},\n')

# data.jsonの出力
def output_data_json(fname, list_last_update, df_list, summary_last_update, df_summary):
fileobj = open(fname, 'w', encoding = 'utf_8')
fileobj.write('{\n')
# 表示用データ(
output_patients_list(fileobj, list_last_update, df_list)
output_patientslist_summary(fileobj, summary_last_update, df_list)
#output_patients_summary(fileobj, summary_last_update, df_summary)
output_main_summary(fileobj, summary_last_update, df_summary)
output_sickbeds_summary(fileobj, summary_last_update, df_summary)

fileobj.write(TAB[1] + '"lastUpdate": "{}"\n'.format( datetime.datetime.now().strftime('%Y/%m/%d %H:%M')))

fileobj.write('}\n')
fileobj.close()

def output_sickbeds_json(fname, last_update, summary):
last_data = summary.iloc[len(summary.index)-1]
fileobj = open(fname, 'w', encoding = 'utf_8')
fileobj.write('{\n')
fileobj.write(TAB[1] + '"data": {\n')
fileobj.write(TAB[2] + '"入院患者数": {},\n'.format(last_data['入院者数']))
fileobj.write(TAB[2] + '"残り病床数": {}\n'.format(last_data['感染症対応病床数'] - last_data['入院者数']))
fileobj.write(TAB[1] + '},\n')
#fileobj.write(TAB[1] + '"last_update": "{}"\n'.format(last_update.strftime('%Y/%m/%d %H:%M')))
fileobj.write(TAB[1] + '"last_update": "{}"\n'.format(last_update.strftime('%Y/%m/%d')))
fileobj.write('}\n')
fileobj.close()

def main(args):
pd.set_option('display.max_columns', 20)

# patient_list
datauri = "https://docs.google.com/spreadsheets/d/{0}/export?format=xlsx&id={0}".format( args.gid )
list_last_update, df_list = load_patient_list( datauri, args.list )
#print(list_last_update, len(df_list.index))
#print( df_list.head())
#print( df_list )

# summary
datauri = "https://docs.google.com/spreadsheets/d/{0}/export?format=xlsx&id={0}".format( args.gid )
summary_last_update, df_summary = load_patient_summary( datauri, args.summary )
#print(summary_last_update, len(df_summary.index))
#print(df_summary.head())

# output data.json
output_data_json(args.data, list_last_update, df_list, summary_last_update, df_summary)
# output_sickbeds_json(args.beds, summary_last_update, df_summary)

if __name__ == '__main__':
parser = argparse.ArgumentParser()
help_ = 'Google Spreadsheet Id'
parser.add_argument('-i', '--gid', help=help_, default=SRC_SHEETID )
help_ = 'Patient List Sheet'
parser.add_argument('-l', '--list', help=help_, default=SHEET1_NAME )
help_ = 'Patient Summary Sheet'
parser.add_argument('-s', '--summary', help=help_, default=SHEET2_NAME )
help_ = 'Data file'
parser.add_argument('-d', '--data', help=help_, default=os.path.join(DATA_DIR, DEST1_FILE))
args = parser.parse_args()
main( args )

#SHEET2_NAME = '入院患者の状況'

Loading

0 comments on commit 8b4ca70

Please sign in to comment.