Skip to content

Commit 3175ea7

Browse files
committed
Refactor.
1 parent 78a3962 commit 3175ea7

File tree

11 files changed

+732
-229
lines changed

11 files changed

+732
-229
lines changed

Pipfile.lock

Lines changed: 105 additions & 70 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/__init__.py renamed to jira_database_etl/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from config import Config
2+
from loguru import logger
23
from .jirafetch import FetchJiraIssues
34
from .transform import TransformData
45
from .db import Database
@@ -9,7 +10,7 @@ def init_script():
910
issues, epics = fetch_jira_issues()
1011
issues, epics = clean_jira_issues(issues, epics)
1112
upload = upload_issues(issues, epics)
12-
print(upload)
13+
logger.info(upload)
1314

1415

1516
def fetch_jira_issues():
@@ -22,7 +23,7 @@ def fetch_jira_issues():
2223

2324
def clean_jira_issues(issues, epics):
2425
"""Clean data and create pandas DataFrame."""
25-
print('Transforming JIRA issues to tabular data...')
26+
logger.info('Transforming JIRA issues to tabular data...')
2627
transform_data = TransformData()
2728
issues_df = transform_data.construct_dataframe(issues)
2829
epics_df = transform_data.construct_dataframe(epics)
@@ -31,7 +32,7 @@ def clean_jira_issues(issues, epics):
3132

3233
def upload_issues(issues, epics):
3334
"""Upload issues table to SQL database."""
34-
print("Preparing database upload...")
35+
logger.info("Preparing database upload...")
3536
db = Database(Config)
3637
epics_upload = db.upload_epics(epics)
3738
issues_upload = db.upload_issues(issues)

jira_database_etl/db.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
from loguru import logger
2+
from sqlalchemy import create_engine, text
3+
from sqlalchemy.types import Integer, Text, String
4+
import pandas as pd
5+
6+
7+
class Database:
8+
"""Merge Epic metadata and upload JIRA issues."""
9+
10+
def __init__(self, Config):
11+
self.db_uri = Config.db_uri
12+
self.db_jira_table = Config.db_jira_table
13+
self.db_epic_table = Config.db_epic_table
14+
self.engine = create_engine(
15+
self.db_uri,
16+
echo=False
17+
)
18+
19+
def upload_epics(self, epics_df):
20+
"""Create SQL table of JIRA epics."""
21+
self.__truncate_table(self.db_epic_table)
22+
result = self.__upload_dataframe(epics_df, self.db_epic_table)
23+
return result
24+
25+
def upload_issues(self, issues_df):
26+
"""Create SQL table of JIRA issues & JOIN with epic info."""
27+
self.__truncate_table(self.db_jira_table)
28+
issues_df = self.__merge_epic_metadata(issues_df)
29+
result = self.__upload_dataframe(issues_df, self.db_jira_table)
30+
return result
31+
32+
def __truncate_table(self, table):
33+
"""Clear existing SQL table."""
34+
sql = text(f'TRUNCATE TABLE {table}')
35+
self.engine.execute(sql)
36+
37+
def __merge_epic_metadata(self, jira_issues_df):
38+
"""Merge epic metadata from existing SQL table."""
39+
epics_df = pd.read_sql_table(
40+
self.db_epic_table,
41+
self.engine
42+
)
43+
logger.info(epics_df)
44+
jira_issues_df = pd.merge(
45+
jira_issues_df,
46+
epics_df[['key', 'epic_name']],
47+
how='left',
48+
left_on='epic_link',
49+
right_on='key',
50+
copy=False
51+
)
52+
jira_issues_df = jira_issues_df.rename(columns={"key_x": "key", "key_y": "epic_key"})
53+
return jira_issues_df
54+
55+
def __upload_dataframe(self, issues_df, table):
56+
"""Upload JIRA DataFrame to database."""
57+
issues_df.to_sql(
58+
table,
59+
self.engine,
60+
if_exists='replace',
61+
chunksize=500,
62+
index=False,
63+
dtype={
64+
"id": Integer,
65+
"assignee_name": String(30),
66+
"epic_link": String(50),
67+
"issuetype": String(50),
68+
"key": String(10),
69+
"priority_name": String(30),
70+
"priority_rank": Integer,
71+
"project": String(70),
72+
"status": String(30),
73+
"summary": Text,
74+
"resolution": String(70),
75+
"story_points": Integer,
76+
"labels": Text,
77+
"updated": Integer,
78+
"created": Integer,
79+
'sprint_status': String(255),
80+
'sprint_name': Text,
81+
'sprint_goal': Text,
82+
"epic_name": String(100)
83+
}
84+
)
85+
success_message = f'Uploaded {len(issues_df)} rows to {table} table.'
86+
return success_message

src/jirafetch.py renamed to jira_database_etl/jirafetch.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import math
22
import requests
3+
from loguru import logger
34

45

56
class FetchJiraIssues:
@@ -17,16 +18,20 @@ def __init__(self, config):
1718

1819
def get_issues(self):
1920
"""Fetch JIRA issues which are not Epics."""
20-
print('Fetching issues from JIRA...')
21-
issues = self.__fetch_all_results(self.jira_issues_jql,
22-
self.jira_issues_fields)
21+
logger.info('Fetching issues from JIRA...')
22+
issues = self.__fetch_all_results(
23+
self.jira_issues_jql,
24+
self.jira_issues_fields
25+
)
2326
return issues
2427

2528
def get_epics(self):
2629
"""Fetch JIRA issues which are Epics."""
27-
print('Fetching epics from JIRA...')
28-
issues = self.__fetch_all_results(self.jira_epics_jql,
29-
self.jira_epics_fields)
30+
logger.info('Fetching epics from JIRA...')
31+
issues = self.__fetch_all_results(
32+
self.jira_epics_jql,
33+
self.jira_epics_fields
34+
)
3035
return issues
3136

3237
def __get_total_number_of_issues(self, jql):
@@ -35,15 +40,16 @@ def __get_total_number_of_issues(self, jql):
3540
"jql": jql,
3641
"maxResults": 0,
3742
"startAt": 0}
38-
req = requests.get(self.jira_endpoint,
39-
headers={"Accept": "application/json"},
40-
params=params,
41-
auth=(self.jira_username, self.jira_api_key))
42-
print(req.json())
43+
req = requests.get(
44+
self.jira_endpoint,
45+
headers={"Accept": "application/json"},
46+
params=params,
47+
auth=(self.jira_username, self.jira_api_key)
48+
)
4349
total_results = req.json().get('total', None)
4450
if total_results:
4551
return total_results
46-
print('Could not find any issues!')
52+
logger.info('Could not find any issues!')
4753

4854
def __fetch_all_results(self, jql, fields):
4955
"""Retrieve all JIRA issues."""
@@ -58,16 +64,18 @@ def fetch_single_page(jql, fields):
5864
"startAt": len(issue_arr),
5965
"validateQuery": "warn",
6066
"fields": fields}
61-
req = requests.get(self.jira_endpoint,
62-
headers={"Accept": "application/json"},
63-
params=params,
64-
auth=(self.jira_username, self.jira_api_key))
67+
req = requests.get(
68+
self.jira_endpoint,
69+
headers={"Accept": "application/json"},
70+
params=params,
71+
auth=(self.jira_username, self.jira_api_key)
72+
)
6573
response = req.json()
6674
issues = response['issues']
6775
issues_so_far = len(issue_arr) + self.results_per_page
6876
if issues_so_far > num_issues:
6977
issues_so_far = num_issues
70-
print(f'Fetched {issues_so_far} out of {num_issues} total issues.')
78+
logger.info(f'Fetched {issues_so_far} out of {num_issues} total issues.')
7179
issue_arr.extend(issues)
7280
# Check if additional pages of results exist.
7381
count = math.ceil(num_issues/self.results_per_page)

src/transform.py renamed to jira_database_etl/transform.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ def construct_dataframe(self, issues_json):
1414
jira_issues_df = json_normalize(issue_json_list)
1515
return jira_issues_df
1616

17-
def dict_to_json_string(self, issue_dict):
17+
@staticmethod
18+
def dict_to_json_string(issue_dict):
1819
"""Convert dict to JSON to string."""
1920
issue_json_string = json.dumps(issue_dict)
2021
issue_json = json.loads(issue_json_string)
@@ -53,7 +54,8 @@ def make_issue_body(self, issue):
5354
body['labels'] = self.parse_label_data(issue['fields']['labels'])
5455
return body
5556

56-
def parse_sprint_data(self, sprint):
57+
@staticmethod
58+
def parse_sprint_data(sprint):
5759
"""Parse raw sprint string."""
5860
sprint_body = {
5961
'sprint_status': sprint[0].split('state=')[1].split(',', 1)[0],
@@ -62,7 +64,8 @@ def parse_sprint_data(self, sprint):
6264
}
6365
return sprint_body
6466

65-
def parse_label_data(self, labels):
67+
@staticmethod
68+
def parse_label_data(labels):
6669
"""Parse list of labels."""
6770
labels = ', '.join(labels)
6871
return labels

main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""Script entry point."""
2-
from src import main
2+
from jira_database_etl import init_script
33

44
if __name__ == '__main__':
5-
main()
5+
init_script()

0 commit comments

Comments
 (0)