import time
import os
import json
time_start_notebook = time.time()
import numpy as np
import pandas as pd
# Jupyter notebook settings for pandas
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_colwidth', 100)
from datetime import datetime
from dateutil.relativedelta import relativedelta
# last_year = datetime.now() - relativedelta(years=1)
from sqlalchemy import create_engine
# configs
data = json.load(open(os.path.expanduser('~/.config/postgres/credentials.json')))
pw,port = data['password'], data['port'] # port is 5432
dbname = 'wbhospital' # open pdadmin4 and look if this database exists.
%load_ext sql
%sql postgres://postgres:$pw@localhost:$port/$dbname
%%sql
select table_schema,table_name
from information_schema.tables
where table_schema not in ('pg_catalog', 'information_schema')
and table_schema not like 'pg_toast%'
order by table_schema, table_name
* postgres://postgres:***@localhost:5432/wbhospital 15 rows affected.
table_schema | table_name |
---|---|
public | affiliated_with |
public | appointment |
public | block |
public | department |
public | medication |
public | nurse |
public | on_call |
public | patient |
public | physician |
public | prescribes |
public | procedure |
public | room |
public | stay |
public | trained_in |
public | undergoes |
tables = _
print(type(tables))
df_tables = pd.DataFrame(tables, columns=['table_schema','table_name'])
<class 'sql.run.ResultSet'>
df_tables.columns
Index(['table_schema', 'table_name'], dtype='object')
x = %sql select column_name, data_type, character_maximum_length from INFORMATION_SCHEMA.COLUMNS where table_name = 'appointment';
dfx = x.DataFrame()
dfx
* postgres://postgres:***@localhost:5432/wbhospital 7 rows affected.
column_name | data_type | character_maximum_length | |
---|---|---|---|
0 | appointmentid | integer | None |
1 | patient | integer | None |
2 | prepnurse | integer | None |
3 | physician | integer | None |
4 | start | date | None |
5 | End | date | None |
6 | examinationroom | text | None |
'**' + dfx['column_name'] + '**'
# I don't see any whitespaces around the column names of this table.
0 **appointmentid** 1 **patient** 2 **prepnurse** 3 **physician** 4 **start** 5 **End** 6 **examinationroom** Name: column_name, dtype: object
engine = create_engine(f'postgresql://postgres:{pw}@localhost:{port}/{dbname}')
con = engine.connect()
for t in df_tables['table_name']:
print(t)
display(pd.read_sql(f'select * from {t} limit 1',con))
affiliated_with
physician | department | primaryaffiliation | |
---|---|---|---|
0 | 1 | 1 | True |
appointment
appointmentid | patient | prepnurse | physician | start | End | examinationroom | |
---|---|---|---|---|---|---|---|
0 | 13216584 | 100000001 | 101 | 1 | 2008-04-24 | 2008-04-24 | A |
block
floor | code | |
---|---|---|
0 | 1 | 1 |
department
departmentid | name | head | |
---|---|---|---|
0 | 1 | General Medicine | 4 |
medication
code | name | brand | description | |
---|---|---|---|---|
0 | 1 | Procrastin-X | X | N/A |
nurse
employeeid | name | position | registered | ssn | |
---|---|---|---|---|---|
0 | 101 | Carla Espinosa | Head Nurse | True | 111111110 |
on_call
nurse | blockfloor | blockcode | start | End | |
---|---|---|---|---|---|
0 | 101 | 1 | 1 | 2008-11-04 | 2008-11-04 |
patient
ssn | name | address | phone | insuranceid | pcp | |
---|---|---|---|---|---|---|
0 | 100000001 | John Smith | 42 Foobar Lane | 555-0256 | 68476213 | 1 |
physician
employeeid | name | position | ssn | |
---|---|---|---|---|
0 | 1 | John Dorian | Staff Internist | 111111111 |
prescribes
physician | patient | medication | date | appointment | dose | |
---|---|---|---|---|---|---|
0 | 1 | 100000001 | 1 | 2008-04-24 | 13216584 | 5 |
procedure
code | name | cost | |
---|---|---|---|
0 | 1 | Reverse Rhinopodoplasty | 1500.0 |
room
number | type | blockfloor | blockcode | unavailable | |
---|---|---|---|---|---|
0 | 101 | Single | 1 | 1 | False |
stay
stayid | patient | room | start | End | |
---|---|---|---|---|---|
0 | 3215 | 100000001 | 111 | 2008-05-01 | 2008-05-04 |
trained_in
physician | treatment | certificationdate | certificationexpires | |
---|---|---|---|---|
0 | 3 | 1 | 2008-01-01 | 2008-12-31 |
undergoes
patient | procedure | stay | date | physician | assistingnurse | |
---|---|---|---|---|---|---|
0 | 100000001 | 6 | 3215 | 2008-05-02 | 3 | 101 |
q = """
SELECT Name
FROM Physician
WHERE EmployeeID IN
(
SELECT Physician FROM Undergoes U WHERE NOT EXISTS
(
SELECT * FROM Trained_In
WHERE Treatment = Procedure
AND Physician = U.Physician
)
)
"""
pd.read_sql(q,con)
name | |
---|---|
0 | Christopher Turk |
q = """
SELECT P.Name FROM
Physician AS P,
(SELECT Physician, Procedure FROM Undergoes
EXCEPT
SELECT Physician, Treatment FROM Trained_in) AS Pe
WHERE P.EmployeeID=Pe.Physician
"""
pd.read_sql(q,con)
name | |
---|---|
0 | Christopher Turk |
q = """
SELECT Name
FROM Physician
WHERE EmployeeID IN
(
SELECT Undergoes.Physician
FROM Undergoes
LEFT JOIN Trained_In
ON Undergoes.Physician=Trained_In.Physician
AND Undergoes.Procedure=Trained_In.Treatment
WHERE Treatment IS NULL
)
"""
pd.read_sql(q,con)
name | |
---|---|
0 | Christopher Turk |
q = """
SELECT P.Name AS Physician, Pr.Name AS Procedure, U.Date, Pt.Name AS Patient
FROM Physician P, Undergoes U, Patient Pt, Procedure Pr
WHERE U.Patient = Pt.SSN
AND U.Procedure = Pr.Code
AND U.Physician = P.EmployeeID
AND NOT EXISTS
(
SELECT * FROM Trained_In T
WHERE T.Treatment = U.Procedure
AND T.Physician = U.Physician
)
"""
pd.read_sql(q,con)
physician | procedure | date | patient | |
---|---|---|---|---|
0 | Christopher Turk | Complete Walletectomy | 2008-05-13 | Dennis Doe |
q = """
SELECT P.Name,Pr.Name,U.Date,Pt.Name FROM
Physician AS P,
Procedure AS Pr,
Undergoes AS U,
Patient AS Pt,
(SELECT Physician, Procedure FROM Undergoes
EXCEPT
SELECT Physician, Treatment FROM Trained_in) AS Pe
WHERE P.EmployeeID=Pe.Physician AND Pe.Procedure=Pr.Code AND Pe.Physician=U.Physician AND Pe.Procedure=U.Procedure AND U.Patient=Pt.SSN
"""
pd.read_sql(q,con)
name | name | date | name | |
---|---|---|---|---|
0 | Christopher Turk | Complete Walletectomy | 2008-05-13 | Dennis Doe |
q = """
SELECT Name
FROM Physician
WHERE EmployeeID IN
(
SELECT Physician FROM Undergoes U
WHERE Date >
(
SELECT CertificationExpires
FROM Trained_In T
WHERE T.Physician = U.Physician
AND T.Treatment = U.Procedure
)
)
"""
pd.read_sql(q,con)
name | |
---|---|
0 | Todd Quinlan |
q = """
SELECT P.Name FROM
Physician AS P,
Trained_In T,
Undergoes AS U
WHERE T.Physician=U.Physician AND T.Treatment=U.Procedure AND U.Date>T.CertificationExpires AND P.EmployeeID=U.Physician
"""
pd.read_sql(q,con)
name | |
---|---|
0 | Todd Quinlan |
q = """
SELECT P.Name AS Physician, Pr.Name AS Procedure, U.Date, Pt.Name AS Patient, T.CertificationExpires
FROM Physician P, Undergoes U, Patient Pt, Procedure Pr, Trained_In T
WHERE U.Patient = Pt.SSN
AND U.Procedure = Pr.Code
AND U.Physician = P.EmployeeID
AND Pr.Code = T.Treatment
AND P.EmployeeID = T.Physician
AND U.Date > T.CertificationExpires
"""
pd.read_sql(q,con)
physician | procedure | date | patient | certificationexpires | |
---|---|---|---|---|---|
0 | Todd Quinlan | Obfuscated Dermogastrotomy | 2008-05-09 | Dennis Doe | 2007-12-31 |
pd.read_sql("select * from Appointment limit 2;",con)
appointmentid | patient | prepnurse | physician | start | End | examinationroom | |
---|---|---|---|---|---|---|---|
0 | 13216584 | 100000001 | 101 | 1 | 2008-04-24 | 2008-04-24 | A |
1 | 26548913 | 100000002 | 101 | 2 | 2008-04-24 | 2008-04-24 | B |
pd.read_sql("select start from Appointment limit 2;",con)
start | |
---|---|
0 | 2008-04-24 |
1 | 2008-04-24 |
pd.read_sql("select A.start from Appointment A limit 2;",con)
start | |
---|---|
0 | 2008-04-24 |
1 | 2008-04-24 |
pd.read_sql('select A."End" from Appointment A limit 2;',con)
# LINE 1: select A.End from Appointment A limit 2;
# END is a keyword, we need to escape it using double quotes.
End | |
---|---|
0 | 2008-04-24 |
1 | 2008-04-24 |
q = """
SELECT Pt.Name AS Patient, Ph.Name AS Physician, N.Name AS Nurse,
A.Start, A."End", A.ExaminationRoom,
PhPCP.Name AS PCP
FROM Patient Pt, Physician Ph, Physician PhPCP,
Appointment A LEFT JOIN Nurse N ON A.PrepNurse = N.EmployeeID
WHERE A.Patient = Pt.SSN
AND A.Physician = Ph.EmployeeID
AND Pt.PCP = PhPCP.EmployeeID
AND A.Physician <> Pt.PCP;
"""
pd.read_sql(q,con)
# A.End fails
patient | physician | nurse | start | End | examinationroom | pcp | |
---|---|---|---|---|---|---|---|
0 | Dennis Doe | Percival Cox | Paul Flowers | 2008-04-25 | 2008-04-25 | B | Christopher Turk |
1 | Dennis Doe | Percival Cox | None | 2008-04-26 | 2008-04-26 | C | Christopher Turk |
2 | John Smith | Christopher Turk | None | 2008-04-26 | 2008-04-26 | C | John Dorian |
3 | Dennis Doe | Molly Clock | Laverne Roberts | 2008-04-27 | 2008-04-21 | A | Christopher Turk |
q = """
SELECT Pt.Name AS Patient, Ph.Name AS Physician, N.Name AS Nurse,
A.Start, A.ExaminationRoom,
PhPCP.Name AS PCP
FROM Patient Pt, Physician Ph, Physician PhPCP,
Appointment A LEFT JOIN Nurse N ON A.PrepNurse = N.EmployeeID
WHERE A.Patient = Pt.SSN
AND A.Physician = Ph.EmployeeID
AND Pt.PCP = PhPCP.EmployeeID
AND A.Physician <> Pt.PCP;
"""
pd.read_sql(q,con)
patient | physician | nurse | start | examinationroom | pcp | |
---|---|---|---|---|---|---|
0 | Dennis Doe | Percival Cox | Paul Flowers | 2008-04-25 | B | Christopher Turk |
1 | Dennis Doe | Percival Cox | None | 2008-04-26 | C | Christopher Turk |
2 | John Smith | Christopher Turk | None | 2008-04-26 | C | John Dorian |
3 | Dennis Doe | Molly Clock | Laverne Roberts | 2008-04-27 | A | Christopher Turk |
q = """
SELECT * FROM Undergoes U
WHERE Patient <>
(
SELECT Patient FROM Stay S
WHERE U.Stay = S.StayID
);
"""
pd.read_sql(q,con)
patient | procedure | stay | date | physician | assistingnurse | |
---|---|---|---|---|---|---|
0 | 100000001 | 7 | 3217 | 2008-05-10 | 7 | 101 |
q = """
SELECT N.Name FROM Nurse N
WHERE EmployeeID IN
(
SELECT OC.Nurse FROM On_Call OC, Room R
WHERE OC.BlockFloor = R.BlockFloor
AND OC.BlockCode = R.BlockCode
AND R.Number = 123
);
"""
pd.read_sql(q,con)
name | |
---|---|
0 | Laverne Roberts |
1 | Paul Flowers |
q = """
SELECT ExaminationRoom, COUNT(AppointmentID) AS Number FROM Appointment
GROUP BY ExaminationRoom;
"""
pd.read_sql(q,con)
examinationroom | number | |
---|---|---|
0 | B | 3 |
1 | C | 3 |
2 | A | 3 |
q = """
SELECT Pt.Name, PhPCP.Name FROM Patient Pt, Physician PhPCP
WHERE Pt.PCP = PhPCP.EmployeeID
AND EXISTS
(
SELECT * FROM Prescribes Pr
WHERE Pr.Patient = Pt.SSN
AND Pr.Physician = Pt.PCP
)
AND EXISTS
(
SELECT * FROM Undergoes U, Procedure Pr
WHERE U.Procedure = Pr.Code
AND U.Patient = Pt.SSN
AND Pr.Cost > 5000
)
AND 2 <=
(
SELECT COUNT(A.AppointmentID) FROM Appointment A, Nurse N
WHERE A.PrepNurse = N.EmployeeID
AND N.Registered = true
)
AND NOT Pt.PCP IN
(
SELECT Head FROM Department
);
"""
# NOTE: in sqlite we can compare boolean and integer 1 but in postgres we cant
# sqlite: N.Registered = 1
# postgres: N.Registered = true
pd.read_sql(q,con)
name | name | |
---|---|---|
0 | John Smith | John Dorian |
time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken to run whole notebook: {:.0f} hr '\
'{:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))
Time taken to run whole notebook: 0 hr 0 min 2 secs