1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
| from json.decoder import JSONDecoder from elasticsearch import Elasticsearch import logging,json from datetime import datetime
es = Elasticsearch([{'host':'39.107.117.232','port':9200}], http_auth=('elastic', 'elastic'), timeout = 90000)
sqs = { "size" : 0, "aggs": { "art": { "terms": { "field": "article.keyword", "size": 5 }, "aggs": { "art_total": { "value_count": { "field": "article.keyword" } }, "_time": { "date_histogram": { "field": "visitTime", "calendar_interval": "hour" } } } } } }
_search_result = es.search(index="yj_visit_data2" , body=sqs) _result_json = json.dumps(_search_result,sort_keys=True, indent=4, separators=(', ', ': '), ensure_ascii=False)
aggregations = _search_result['aggregations'] art = aggregations['art'] buckets = art['buckets']
def getHour(time): return (int)(time[11:13])
def countByMonth(dataList , hourTar): _count = 0 for data in dataList: timestamp = data['key_as_string'] hour = getHour(timestamp) if hour == hourTar: _count = (int)(data['doc_count']) + _count return _count final_list = []
for outBucket in buckets: simple_result = {} _time = outBucket['_time'] innerBuckets = _time['buckets'] print("time inner bucker size" , len(innerBuckets)) simple_list = [] for num in range(0,24): simple_list.append(countByMonth(innerBuckets,num)) simple_result[0] = outBucket['key'] simple_result[1] = simple_list final_list.append(simple_result) print("final result ----> ",final_list)
|