-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata cleaning.sql
More file actions
107 lines (79 loc) · 2.7 KB
/
data cleaning.sql
File metadata and controls
107 lines (79 loc) · 2.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
-----------------------------------------clean data in sql quries-----------------------------------------------
select
SaleDateConverted
from Nashville
--standardize date formant
alter table Nashville
add saleDateConverted date
update Nashville
set saleDateConverted = convert (date,SaleDate)
--populate property address data
select *
from Nashville
where PropertyAddress is null
order by ParcelID
select a.ParcelID,a.PropertyAddress,b.ParcelID,b.PropertyAddress,
isnull (a.propertyAddress, b.PropertyAddress)
from Nashville a
join Nashville b
on a.ParcelID = b.ParcelID
and a.[UniqueID ] <> b.[UniqueID ]
where a.PropertyAddress is null
update a
set propertyAddress = isnull (a.propertyAddress, b.PropertyAddress)
from Nashville a
join Nashville b
on a.ParcelID = b.ParcelID
and a.[UniqueID ] <> b.[UniqueID ]
where a.PropertyAddress is null
--breaking out address into individual columns (address,city,state)
select
substring (PropertyAddress,0 , CHARINDEX (',', propertyAddress)) as address
,SUBSTRING(propertyAddress, CHARINDEX (',', propertyAddress)+1,len(PropertyAddress)) as Address
from Nashville
alter table nashville
add PropertySplitAddress nvarchar (255),
PropertySplitCity nvarchar (255)
update
Nashville
set PropertySplitAddress = substring (PropertyAddress,0 , CHARINDEX (',', propertyAddress)),
PropertySplitCity = SUBSTRING(propertyAddress, CHARINDEX (',', propertyAddress)+1,len(PropertyAddress))
--split onwer address(using Parsename)
SELECT
PARSENAME (REPLACE(OwnerAddress , ',' , '.') , 3),
PARSENAME (REPLACE(OwnerAddress , ',' , '.') , 2),
PARSENAME (REPLACE(OwnerAddress , ',' , '.') , 1)
from nashville
ALTER TABLE nashville
ADD OwnerSplitAddress nvarchar (255),
OwnerSplitCity nvarchar (255),
OwnerSplitState nvarchar (255)
update Nashville
set OwnerSplitAddress =PARSENAME (REPLACE(OwnerAddress , ',' , '.') , 3),
OwnerSplitCity = PARSENAME (REPLACE(OwnerAddress , ',' , '.') , 2),
OwnerSplitState =PARSENAME (REPLACE(OwnerAddress , ',' , '.') , 1)
--change Y and N to YES and NO in SoldAsVacant column
select
SoldAsVacant,
case
when SoldAsVacant = 'y' then 'yes'
when SoldAsVacant = 'N' then 'NO'
else SoldAsVacant
end
from Nashville
update Nashville
set SoldAsVacant =case
when SoldAsVacant = 'y' then 'yes'
when SoldAsVacant = 'N' then 'NO'
else SoldAsVacant
end
---remove Duplicates
select
ParcelID,LandUse,PropertyAddress,SaleDate,SalePrice,LegalReference, count(*) as Duplicate_count
from Nashville
group by ParcelID,LandUse,PropertyAddress,SaleDate,SalePrice,LegalReference
having count(*) >1
alter table nashville
drop column UniqueID
select *
from Nashville