Create schema allinspections_updated;
use allinspections_updated;
CREATE TABLE `business` (
`Business_id` varchar(40) NOT NULL,
`Name` varchar(100) DEFAULT NULL,
`Address` varchar(40) DEFAULT NULL,
`City` varchar(15) DEFAULT NULL,
`State` varchar(4) DEFAULT NULL,
`Postal_Code` varchar(10) DEFAULT NULL,
`Latitude` varchar(10) DEFAULT NULL,
`Longitude` varchar(10) DEFAULT NULL,
`Phone_Number` varchar(15) DEFAULT NULL,
PRIMARY KEY (`Business_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
LOAD DATA LOCAL INFILE 'C:\\businesses.csv'
INTO TABLE business
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY'\n'
IGNORE 1 LINES;
select count(*)
from business;
select *
from business;
CREATE TABLE`inspections` (
`business_id` varchar(36) DEFAULT NULL,
`score` int(11) DEFAULT NULL,
`date` date DEFAULT NULL,
KEY `inspections_FK_idx` (`business_id`),
CONSTRAINT `inspections_FK` FOREIGN KEY (`business_id`) REFERENCES `business` (`Business_id`) ON DELETE NO ACTION ON UPDATE NO ACTION
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
#### note that for this load statement, we used the "set date" function to change the date to the format that MySQL prefers: year, month, day,
##### which will allow us to join it to the date field in "violations" table once we begin linking the tables
LOAD DATA LOCAL INFILE 'C:\\inspections.csv'
INTO TABLE inspections
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY'\n'
IGNORE 1 LINES
(business_id, score, @date)
set date = str_to_date(@date, '%Y%m%d');
update inspections set date = str_to_date(left(date, 8), '%Y%m%d');
select count(*)
from inspections;
select *
from inspections
order by date desc;
CREATE TABLE`violations` (
`Business_id` varchar(36) NOT NULL,
`Date` date DEFAULT NULL,
`Code` varchar(5) DEFAULT NULL,
`Description` varchar(200) DEFAULT NULL,
KEY `violations_fk_idx` (`Business_id`),
CONSTRAINT `violations_fk` FOREIGN KEY (`Business_id`) REFERENCES `business` (`Business_id`) ON DELETE NO ACTION ON UPDATE NO ACTION
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
LOAD DATA LOCAL INFILE 'C:\\violations.csv'
INTO TABLE violations
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY'\n'
IGNORE 1 LINES;
select count(*)
from violations;
select *
from violations;
#### the legend contains the description for the scores, which comes in handy when using the "where" statement to filter our queries
CREATE TABLE`legend` (
`Minimum` int(11) DEFAULT NULL,
`Maximum` int(11) DEFAULT NULL,
`Legend` varchar(20) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
select *
from legend;
#### this query does two things, creates a master table from the tables that we have joined using "where" statement, which is explained
#### in The Data Journalists' chapter, beginning on page 100
CREATE VIEW All_Inspection_Master AS
SELECT business.Business_id, business.Name, business.Address,business.Phone_Number,violations.Date,
violations.Code, violations.Description, inspections.score
FROM business, inspections, violations
where business.Business_id = inspections.business_id
AND business.Business_id = violations.business_id
AND inspections.date = violations.date;
select count(*)
from all_inspection_master;
select *
from all_inspection_master
order by Date desc;
#### once we have created our master table, it's a good idea to run queries that group the key categories in order to determine the
#### the most problematic ones. Think of these queries as performing the same task as a pivot table in Excel
select Name, count(Business_id) Name
from all_inspection_master
group by Name
order by count(Name) desc;
select code, count(Business_id) 'code violations'
from all_inspection_master
group by Code
order by count(Business_id) desc;
select Description, count(Business_id) 'violation descriptions'
from all_inspection_master
group by Description
order by count(Business_id) desc;
#### this query acts like a pivot table in that is pulls the year out of the date, groups the records by year, and then counts them
select Name, count(Name) name, year(Date) Year
from all_inspection_master
group by Name, Year
order by count(Name) desc;
select Name, count(Name) name, year(Date) Year
from all_inspection_master
where score='0'
group by Name, Year
order by count(Name) desc;
#### now we can begin running queries to drill down. Be sure to export the results you like as csv files to continue working with them
select *
from all_inspection_master
where name like "Red Oak%" and date >'2016-01-01' and score ='0'
order by date desc;
select *
from all_inspection_master
where date >'2016-01-01'
order by date desc;
select Name, count(name) Name
from all_inspection_master
group by name;
SELECT *
FROM all_inspection_master
WHERE Name like '%Tim%';
SELECT *
FROM all_inspection_master
WHERE Name like '%Tim%' and score ='0';
select count(Business_id) 'Number of violations', Name, Address
from all_inspection_master
where score = '0'
group by Name, Address
order by count(Business_id) desc;
select *
from all_inspection_master
where score = '0' and Name = 'DAISY NAIL & SPA - ST LAURENT';
select *
from all_inspection_master
where Name like '%retirement%';
select *
from all_inspection_master
where Name like '%retirement%' and score = '0';