-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
63 lines (57 loc) · 2.45 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os.path
import pandas as pd
import json
import requests
import scope_file as project_scope
era = project_scope.era
institutions = project_scope.institution_dict
nice_list = project_scope.NICE_LIST
def query_year(my_year, my_value):
global page_number, dataSet
params = {
'filter': f'publication_year:{str(my_year)},authorships.institutions.lineage:{my_value},type:types/article',
'select': 'id,ids,doi,topics,publication_year,fwci,type,cited_by_count,cited_by_percentile_year,'
'authorships,open_access,sustainable_development_goals,counts_by_year',
'page': str(page_number),
'mailto': str(nice_list)
}
api_url = "https://api.openalex.org/works"
response = requests.get(api_url, params=params)
for resp in response.json()['results']:
resp2 = {}
for key1, value1 in resp.items():
if key1 == "topics":
topics = {}
for val in value1:
topics.update({"id": val["id"], "display_name": val["display_name"],
"field": val["field"]["display_name"], "score": val["score"]})
# if float(val["score"]) >= 0.8:
# topics.update({"id": val["id"], "display_name": val["display_name"],
# "field": val["field"]["display_name"], "score": val["score"]})
# else:
# break
resp2.update({key1: topics})
elif key1 == "authorships":
authorships = []
for val in value1:
for inst in val["institutions"]:
authorships.append({"id": inst["id"], "display_name": inst["display_name"]})
resp2.update({key1: authorships})
else:
resp2.update({key1: value1})
dataSet.append(resp2)
del resp2
return response.json()['meta']
for key, value in institutions.items():
for year in era:
dataSet = []
page_number = 1
keep_at_it = True
while keep_at_it:
this_json = query_year(year, value)
if int(this_json['page']) * int(this_json['per_page']) >= int(this_json['count']):
keep_at_it = False
page_number += 1
datafile = pd.DataFrame.from_records(dataSet)
datafile.to_csv(".\\output\\" + str(key) + "_" + str(year) + ".csv", sep=";")
del dataSet