
import time
import os
import json
time_start_notebook = time.time()
import numpy as np
import pandas as pd
# Jupyter notebook settings for pandas
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_colwidth', 100)
from datetime import datetime
from dateutil.relativedelta import relativedelta
# last_year = datetime.now() - relativedelta(years=1)
from sqlalchemy import create_engine
# configs
data = json.load(open(os.path.expanduser('~/.config/postgres/credentials.json')))
pw,port = data['password'], data['port'] # port is 5432
dbname = 'wbhospital' # open pdadmin4 and look if this database exists.
%load_ext sql
%sql postgres://postgres:$pw@localhost:$port/$dbname
%%sql
select table_schema,table_name
from information_schema.tables
where table_schema not in ('pg_catalog', 'information_schema')
and table_schema not like 'pg_toast%'
order by table_schema, table_name
* postgres://postgres:***@localhost:5432/wbhospital 15 rows affected.
| table_schema | table_name | 
|---|---|
| public | affiliated_with | 
| public | appointment | 
| public | block | 
| public | department | 
| public | medication | 
| public | nurse | 
| public | on_call | 
| public | patient | 
| public | physician | 
| public | prescribes | 
| public | procedure | 
| public | room | 
| public | stay | 
| public | trained_in | 
| public | undergoes | 
tables = _
print(type(tables))
df_tables = pd.DataFrame(tables, columns=['table_schema','table_name'])
<class 'sql.run.ResultSet'>
df_tables.columns
Index(['table_schema', 'table_name'], dtype='object')
x = %sql select column_name, data_type, character_maximum_length from INFORMATION_SCHEMA.COLUMNS where table_name = 'appointment';
dfx = x.DataFrame()
dfx
* postgres://postgres:***@localhost:5432/wbhospital 7 rows affected.
| column_name | data_type | character_maximum_length | |
|---|---|---|---|
| 0 | appointmentid | integer | None | 
| 1 | patient | integer | None | 
| 2 | prepnurse | integer | None | 
| 3 | physician | integer | None | 
| 4 | start | date | None | 
| 5 | End | date | None | 
| 6 | examinationroom | text | None | 
'**' + dfx['column_name'] + '**'
# I don't see any whitespaces around the column names of this table.
0 **appointmentid** 1 **patient** 2 **prepnurse** 3 **physician** 4 **start** 5 **End** 6 **examinationroom** Name: column_name, dtype: object
engine = create_engine(f'postgresql://postgres:{pw}@localhost:{port}/{dbname}')
con = engine.connect()
for t in df_tables['table_name']:
    print(t)
    display(pd.read_sql(f'select * from {t} limit 1',con))
affiliated_with
| physician | department | primaryaffiliation | |
|---|---|---|---|
| 0 | 1 | 1 | True | 
appointment
| appointmentid | patient | prepnurse | physician | start | End | examinationroom | |
|---|---|---|---|---|---|---|---|
| 0 | 13216584 | 100000001 | 101 | 1 | 2008-04-24 | 2008-04-24 | A | 
block
| floor | code | |
|---|---|---|
| 0 | 1 | 1 | 
department
| departmentid | name | head | |
|---|---|---|---|
| 0 | 1 | General Medicine | 4 | 
medication
| code | name | brand | description | |
|---|---|---|---|---|
| 0 | 1 | Procrastin-X | X | N/A | 
nurse
| employeeid | name | position | registered | ssn | |
|---|---|---|---|---|---|
| 0 | 101 | Carla Espinosa | Head Nurse | True | 111111110 | 
on_call
| nurse | blockfloor | blockcode | start | End | |
|---|---|---|---|---|---|
| 0 | 101 | 1 | 1 | 2008-11-04 | 2008-11-04 | 
patient
| ssn | name | address | phone | insuranceid | pcp | |
|---|---|---|---|---|---|---|
| 0 | 100000001 | John Smith | 42 Foobar Lane | 555-0256 | 68476213 | 1 | 
physician
| employeeid | name | position | ssn | |
|---|---|---|---|---|
| 0 | 1 | John Dorian | Staff Internist | 111111111 | 
prescribes
| physician | patient | medication | date | appointment | dose | |
|---|---|---|---|---|---|---|
| 0 | 1 | 100000001 | 1 | 2008-04-24 | 13216584 | 5 | 
procedure
| code | name | cost | |
|---|---|---|---|
| 0 | 1 | Reverse Rhinopodoplasty | 1500.0 | 
room
| number | type | blockfloor | blockcode | unavailable | |
|---|---|---|---|---|---|
| 0 | 101 | Single | 1 | 1 | False | 
stay
| stayid | patient | room | start | End | |
|---|---|---|---|---|---|
| 0 | 3215 | 100000001 | 111 | 2008-05-01 | 2008-05-04 | 
trained_in
| physician | treatment | certificationdate | certificationexpires | |
|---|---|---|---|---|
| 0 | 3 | 1 | 2008-01-01 | 2008-12-31 | 
undergoes
| patient | procedure | stay | date | physician | assistingnurse | |
|---|---|---|---|---|---|---|
| 0 | 100000001 | 6 | 3215 | 2008-05-02 | 3 | 101 | 
q = """
SELECT Name 
  FROM Physician 
 WHERE EmployeeID IN 
       (
         SELECT Physician FROM Undergoes U WHERE NOT EXISTS 
            (
               SELECT * FROM Trained_In 
                WHERE Treatment = Procedure 
                  AND Physician = U.Physician
            )
       )
"""
pd.read_sql(q,con)
| name | |
|---|---|
| 0 | Christopher Turk | 
q = """
SELECT P.Name FROM 
 Physician AS P,    
 (SELECT Physician, Procedure FROM Undergoes 
    EXCEPT 
    SELECT Physician, Treatment FROM Trained_in) AS Pe
 WHERE P.EmployeeID=Pe.Physician
"""
pd.read_sql(q,con)
| name | |
|---|---|
| 0 | Christopher Turk | 
q = """
SELECT Name 
  FROM Physician
 WHERE EmployeeID IN
   (
      SELECT Undergoes.Physician
        FROM Undergoes 
             LEFT JOIN Trained_In
             ON Undergoes.Physician=Trained_In.Physician
                 AND Undergoes.Procedure=Trained_In.Treatment
       WHERE Treatment IS NULL
   )
"""
pd.read_sql(q,con)
| name | |
|---|---|
| 0 | Christopher Turk | 
q = """
SELECT P.Name AS Physician, Pr.Name AS Procedure, U.Date, Pt.Name AS Patient 
  FROM Physician P, Undergoes U, Patient Pt, Procedure Pr
  WHERE U.Patient = Pt.SSN
    AND U.Procedure = Pr.Code
    AND U.Physician = P.EmployeeID
    AND NOT EXISTS 
              (
                SELECT * FROM Trained_In T
                WHERE T.Treatment = U.Procedure 
                AND T.Physician = U.Physician
              )
"""
pd.read_sql(q,con)
| physician | procedure | date | patient | |
|---|---|---|---|---|
| 0 | Christopher Turk | Complete Walletectomy | 2008-05-13 | Dennis Doe | 
q = """
SELECT P.Name,Pr.Name,U.Date,Pt.Name FROM 
 Physician AS P,    
 Procedure AS Pr,
 Undergoes AS U,
 Patient AS Pt,
 (SELECT Physician, Procedure FROM Undergoes 
    EXCEPT 
    SELECT Physician, Treatment FROM Trained_in) AS Pe  
 WHERE P.EmployeeID=Pe.Physician AND Pe.Procedure=Pr.Code AND Pe.Physician=U.Physician AND Pe.Procedure=U.Procedure AND U.Patient=Pt.SSN
"""
pd.read_sql(q,con)
| name | name | date | name | |
|---|---|---|---|---|
| 0 | Christopher Turk | Complete Walletectomy | 2008-05-13 | Dennis Doe | 
q = """
SELECT Name 
  FROM Physician 
 WHERE EmployeeID IN 
       (
         SELECT Physician FROM Undergoes U 
          WHERE Date > 
               (
                  SELECT CertificationExpires 
                    FROM Trained_In T 
                   WHERE T.Physician = U.Physician 
                     AND T.Treatment = U.Procedure
               )
       )
"""
pd.read_sql(q,con)
| name | |
|---|---|
| 0 | Todd Quinlan | 
q = """
SELECT P.Name FROM 
 Physician AS P,    
 Trained_In T,
 Undergoes AS U
 WHERE T.Physician=U.Physician AND T.Treatment=U.Procedure AND U.Date>T.CertificationExpires AND P.EmployeeID=U.Physician
"""
pd.read_sql(q,con)
| name | |
|---|---|
| 0 | Todd Quinlan | 
q = """
SELECT P.Name AS Physician, Pr.Name AS Procedure, U.Date, Pt.Name AS Patient, T.CertificationExpires
  FROM Physician P, Undergoes U, Patient Pt, Procedure Pr, Trained_In T
  WHERE U.Patient = Pt.SSN
    AND U.Procedure = Pr.Code
    AND U.Physician = P.EmployeeID
    AND Pr.Code = T.Treatment
    AND P.EmployeeID = T.Physician
    AND U.Date > T.CertificationExpires
"""
pd.read_sql(q,con)
| physician | procedure | date | patient | certificationexpires | |
|---|---|---|---|---|---|
| 0 | Todd Quinlan | Obfuscated Dermogastrotomy | 2008-05-09 | Dennis Doe | 2007-12-31 | 
pd.read_sql("select * from Appointment limit 2;",con)
| appointmentid | patient | prepnurse | physician | start | End | examinationroom | |
|---|---|---|---|---|---|---|---|
| 0 | 13216584 | 100000001 | 101 | 1 | 2008-04-24 | 2008-04-24 | A | 
| 1 | 26548913 | 100000002 | 101 | 2 | 2008-04-24 | 2008-04-24 | B | 
pd.read_sql("select start from Appointment limit 2;",con)
| start | |
|---|---|
| 0 | 2008-04-24 | 
| 1 | 2008-04-24 | 
pd.read_sql("select A.start from Appointment A limit 2;",con)
| start | |
|---|---|
| 0 | 2008-04-24 | 
| 1 | 2008-04-24 | 
pd.read_sql('select A."End" from Appointment A limit 2;',con)
# LINE 1: select A.End from Appointment A limit 2;
# END is a keyword, we need to escape it using double quotes.
| End | |
|---|---|
| 0 | 2008-04-24 | 
| 1 | 2008-04-24 | 
q = """
SELECT Pt.Name AS Patient, Ph.Name AS Physician, N.Name AS Nurse,
       A.Start, A."End", A.ExaminationRoom,
       PhPCP.Name AS PCP
FROM Patient Pt, Physician Ph, Physician PhPCP,
     Appointment A LEFT JOIN Nurse N ON A.PrepNurse = N.EmployeeID
WHERE A.Patient = Pt.SSN
   AND A.Physician = Ph.EmployeeID
   AND Pt.PCP = PhPCP.EmployeeID
   AND A.Physician <> Pt.PCP;
"""
pd.read_sql(q,con)
# A.End fails
| patient | physician | nurse | start | End | examinationroom | pcp | |
|---|---|---|---|---|---|---|---|
| 0 | Dennis Doe | Percival Cox | Paul Flowers | 2008-04-25 | 2008-04-25 | B | Christopher Turk | 
| 1 | Dennis Doe | Percival Cox | None | 2008-04-26 | 2008-04-26 | C | Christopher Turk | 
| 2 | John Smith | Christopher Turk | None | 2008-04-26 | 2008-04-26 | C | John Dorian | 
| 3 | Dennis Doe | Molly Clock | Laverne Roberts | 2008-04-27 | 2008-04-21 | A | Christopher Turk | 
q = """
SELECT Pt.Name AS Patient, Ph.Name AS Physician, N.Name AS Nurse,
       A.Start, A.ExaminationRoom,
       PhPCP.Name AS PCP
FROM Patient Pt, Physician Ph, Physician PhPCP,
     Appointment A LEFT JOIN Nurse N ON A.PrepNurse = N.EmployeeID
WHERE A.Patient = Pt.SSN
   AND A.Physician = Ph.EmployeeID
   AND Pt.PCP = PhPCP.EmployeeID
   AND A.Physician <> Pt.PCP;
"""
pd.read_sql(q,con)
| patient | physician | nurse | start | examinationroom | pcp | |
|---|---|---|---|---|---|---|
| 0 | Dennis Doe | Percival Cox | Paul Flowers | 2008-04-25 | B | Christopher Turk | 
| 1 | Dennis Doe | Percival Cox | None | 2008-04-26 | C | Christopher Turk | 
| 2 | John Smith | Christopher Turk | None | 2008-04-26 | C | John Dorian | 
| 3 | Dennis Doe | Molly Clock | Laverne Roberts | 2008-04-27 | A | Christopher Turk | 
q = """
SELECT * FROM Undergoes U
 WHERE Patient <> 
   (
     SELECT Patient FROM Stay S
      WHERE U.Stay = S.StayID
   );
"""
pd.read_sql(q,con)
| patient | procedure | stay | date | physician | assistingnurse | |
|---|---|---|---|---|---|---|
| 0 | 100000001 | 7 | 3217 | 2008-05-10 | 7 | 101 | 
q = """
SELECT N.Name FROM Nurse N
 WHERE EmployeeID IN
   (
     SELECT OC.Nurse FROM On_Call OC, Room R
      WHERE OC.BlockFloor = R.BlockFloor
        AND OC.BlockCode = R.BlockCode
        AND R.Number = 123
   );
"""
pd.read_sql(q,con)
| name | |
|---|---|
| 0 | Laverne Roberts | 
| 1 | Paul Flowers | 
q = """
SELECT ExaminationRoom, COUNT(AppointmentID) AS Number FROM Appointment
GROUP BY ExaminationRoom;
"""
pd.read_sql(q,con)
| examinationroom | number | |
|---|---|---|
| 0 | B | 3 | 
| 1 | C | 3 | 
| 2 | A | 3 | 
q = """
SELECT Pt.Name, PhPCP.Name FROM Patient Pt, Physician PhPCP
 WHERE Pt.PCP = PhPCP.EmployeeID
   AND EXISTS
       (
         SELECT * FROM Prescribes Pr
          WHERE Pr.Patient = Pt.SSN
            AND Pr.Physician = Pt.PCP
       )
   AND EXISTS
       (
         SELECT * FROM Undergoes U, Procedure Pr
          WHERE U.Procedure = Pr.Code
            AND U.Patient = Pt.SSN
            AND Pr.Cost > 5000
       )
   AND 2 <=
       (
         SELECT COUNT(A.AppointmentID) FROM Appointment A, Nurse N
          WHERE A.PrepNurse = N.EmployeeID
            AND N.Registered = true
       )
   AND NOT Pt.PCP IN
       (
          SELECT Head FROM Department
       );
"""
# NOTE: in sqlite we can compare boolean and integer 1 but in postgres we cant
# sqlite: N.Registered = 1
# postgres: N.Registered = true
pd.read_sql(q,con)
| name | name | |
|---|---|---|
| 0 | John Smith | John Dorian | 
time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken to run whole notebook: {:.0f} hr '\
      '{:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))
Time taken to run whole notebook: 0 hr 0 min 2 secs