Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions csvw/ashe-example/ashe-example.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
{
"@context": "http://www.w3.org/ns/csvw",
"url": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv",
"dct:title": "Annual summary of hours worked, place of work by Local Authority",
"dct:description": "Estimates of paid hours worked for UK employees by gender and full/part-time working by work based Region to Local Authority level.",
"dct:issued": "2017-10-26T09:30:00.000Z",
"dct:publisher": {
"name": "Office for National Statistics (ONS)",
"@type": "Non-ministerial department",
"@id": "https://www.ons.gov.uk"
},
"dcat:contactPoint": {
"email": "earnings@ons.gsi.gov.uk",
"name": "Roger Smith",
"telephone": "+44 (0)1633 45 6120"
},
"tableSchema": {
"columns": [
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=0",
"datatype": "number",
"name": "Hours per week",
"required": true,
"titles": "V4_2"
},
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=1",
"titles": "Data marking"
},
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=2",
"titles": "Coefficient of variation"
},
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=3",
"name": "Time_codelist",
"required": true,
"valueURL": "https://api.beta.ons.gov.uk/v1/code-lists/calendar-years/codes/{Time_codelist}"
},
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=4",
"description": "",
"name": "time",
"titles": "Time"
},
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=5",
"name": "ashe-geography",
"required": true,
"valueURL": "https://api.beta.ons.gov.uk/v1/code-lists/ashe-geography/codes/{ashe-geography}"
},
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=6",
"description": "",
"name": "geography",
"titles": "Geographic areas"
},
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=7",
"name": "Hours_codelist",
"required": true,
"valueURL": "https://api.beta.ons.gov.uk/v1/code-lists/ashe-hours/codes/{Hours_codelist}"
},
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=8",
"description": "",
"name": "hours",
"titles": "Hours worked"
},
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=9",
"name": "Sex_codelist",
"required": true,
"valueURL": "https://api.beta.ons.gov.uk/v1/code-lists/ashe-sex/codes/{Sex_codelist}"
},
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=10",
"description": "",
"name": "sex",
"titles": "Sex"
},
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=11",
"name": "WorkingPattern_codelist",
"required": true,
"valueURL": "https://api.beta.ons.gov.uk/v1/code-lists/ashe-working-pattern/codes/{WorkingPattern_codelist}"
},
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=12",
"description": "",
"name": "workingpattern",
"titles": "Working pattern"
},
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=13",
"name": "Statistics_codelist",
"required": true,
"valueURL": "https://api.beta.ons.gov.uk/v1/code-lists/ashe-statistics/codes/{Statistics_codelist}"
},
{
"@id": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=14",
"description": "Estimates are provided for mean, median and selected percentiles in the distribution. Percentiles mark the values below which certain proportions of jobs fall. For example, the 20th percentile is the value below which 20% of jobs fall.",
"name": "statistics",
"titles": "Statistics"
}
],
"aboutUrl": "https://api.beta.ons.gov.uk/v1/datasets/ashe-table-7-hours/editions/time-series/versions/1/metadata"
},
"dcat:theme": "/employmentandlabourmarket/peopleinwork/earningsandworkinghours",
"dct:license": "Open Government Licence v3.0",
"dct:accrualPeriodicity": "Annual",
"notes": [
{
"type": "Coefficient of variation (CV)",
"target": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=need-to-store",
"body": "The CV column the quality of each estimate based on the coefficient of variation (CV) of that estimate. The CV is the ratio of the standard error of an estimate to the estimate itself and is expressed as a percentage. The smaller the coefficient of variation the greater the accuracy of the estimate. The true value is likely to lie within +/- twice the CV. For example, for an estimate of £200 with a CV of 5%, we would expect the true population average to be within the range £180 to £220.",
"motivation": ""
},
{
"type": "Data markings",
"target": "https://download.beta.ons.gov.uk/downloads/datasets/ashe-table-7-hours/editions/time-series/versions/1.csv#col=need-to-store",
"body": "Estimates with a CV greater than 20% are suppressed from publication on quality grounds, along with those for which there is a risk of disclosure of individual employees or employers. This is shown as an 'x' in the data marking column.",
"motivation": ""
}
]
}
257 changes: 257 additions & 0 deletions csvw/ashe-example/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
package main

import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"strconv"
"strings"

"github.com/ONSdigital/dp-dataset-api/models"
"github.com/fatih/structs"
)

type Unknown map[string]string

type CSVW struct {
Context string `json:"@context"`
URL string `json:"url"`
Title string `json:"dct:title"`
Description string `json:"dct:description"`
Issued string `json:"dct:issued"`
Creator Creator `json:"dct:publisher"`
Contact models.ContactDetails `json:"dcat:contactPoint"`
TableSchema Columns `json:"tableSchema"`
Theme string `json:"dcat:theme"`
License string `json:"dct:license"`
Frequency string `json:"dct:accrualPeriodicity"`
Notes []Note `json:"notes"`
}

type Creator struct {
Name string `json:"name"`
Type string `json:"@type"`
ID string `json:"@id"` //a URL where more info is available
}

type Columns struct {
C []Column `json:"columns"`
About string `json:"aboutUrl"`
}

type Column map[string]interface{}

type Note struct {
Type string `json:"type"` // is this an enum?
Target string `json:"target"`
Body string `json:"body"`
Motivation string `json:"motivation"` // how is this different from type? do we need this? is this an enum?
}

func main() {
url := "https://api.beta.ons.gov.uk/v1/datasets/ashe-table-7-hours/editions/time-series/versions/1/metadata"

metadata := getMetadata(url)

csv := assignTopLevel(metadata)

csv.TableSchema.About = url
csv.TableSchema.C = populateColumns(metadata.Dimensions, metadata.UnitOfMeasure, metadata.Downloads.CSV.HRef)

var alerts []models.Alert
if metadata.Alerts != nil {
alerts = *metadata.Alerts
}

var usage []models.UsageNote
if metadata.UsageNotes != nil {
usage = *metadata.UsageNotes
}

csv.Notes = addNotes(metadata.Downloads.CSV.HRef, alerts, usage)

// for k, v := range values {
// unk[dynamicKeys[k]] = v
// }
//
// b := marshal(csv, unk)

b, err := json.Marshal(csv)
if err != nil {
panic(err)
}

fmt.Println(string(b))

}

func addNotes(url string, alerts []models.Alert, notes []models.UsageNote) []Note {
var list []Note

for _, a := range alerts {
list = append(list, Note{
Type: a.Type,
Body: a.Description,
Target: url,
})
}

for _, u := range notes {
list = append(list, Note{
Type: u.Title,
Body: u.Note,
Target: url + "#col=need-to-store",
})
}

return list
}

func populateColumns(dims []models.CodeList, unit, csvURL string) []Column {
var list []Column

headerRow := "V4_2,Data marking,Coefficient of variation,Time_codelist,Time,ashe-geography,Geography,Hours_codelist,Hours,Sex_codelist,Sex,WorkingPattern_codelist,WorkingPattern,Statistics_codelist,Statistics"
header := strings.Split(headerRow, ",")

parts := strings.Split(header[0], "_")
if len(parts) != 2 {
fmt.Println(parts[0] + " --- " + header[0])
panic("not valid v4 header")
}

offset, err := strconv.Atoi(parts[1])
if err != nil {
panic("not valid v4 header")
}

//observations
col := Column{
"titles": header[0],
"name": unit,
"datatype": "number",
"required": true,
"@id": csvURL + "#col=0",
}

list = append(list, col)

//data markings
if offset != 0 {
for i := 1; i <= offset; i++ {
col := Column{
"titles": header[i],
"@id": csvURL + "#col=" + strconv.Itoa(i),
}
list = append(list, col)
}
}

offset += 1

header = header[offset:]

//dimensions
for i := 0; i < len(header); i = i + 2 {

codeHeader := header[i]
dimHeader := header[i+1]
dimHeader = strings.ToLower(dimHeader)

var dim models.CodeList

for _, d := range dims {
if d.Name == dimHeader {
dim = d
break
}
}

codeCol := Column{
"name": codeHeader,
"@id": csvURL + "#col=" + strconv.Itoa(offset+i),
"valueURL": dim.HRef + "/codes/{" + codeHeader + "}", //how do we link to the code list or API?
//"datatype": "number",
"required": true,
}

labelCol := Column{
"titles": dim.Label,
"name": dimHeader,
"description": dim.Description,
"@id": csvURL + "#col=" + strconv.Itoa(offset+i+1),
//"datatype": "number",
//"required": true,
}

list = append(list, codeCol, labelCol)
}

return list
}

func assignTopLevel(m *models.Metadata) *CSVW {
return &CSVW{
Context: "http://www.w3.org/ns/csvw",
URL: m.Downloads.CSV.HRef,
Title: m.Title,
Description: m.Description,
Issued: m.ReleaseDate,
Theme: m.Theme,
License: m.License,
Frequency: m.ReleaseFrequency,
Contact: m.Contacts[0],
Creator: Creator{
Name: m.Publisher.Name,
Type: m.Publisher.Type,
ID: m.Publisher.HRef,
},
}
}

func getMetadata(url string) *models.Metadata {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
log.Fatal("NewRequest: ", err)
return nil
}

client := &http.Client{}

resp, err := client.Do(req)
if err != nil {
log.Fatal("Do: ", err)
return nil
}
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil
}
defer resp.Body.Close()

var md models.Metadata
if err := json.Unmarshal(b, &md); err != nil {
log.Println(err)
}

return &md
}

func marshal(st *CSVW, ourMap Unknown) []byte {
structAsMap := structs.Map(st)

for k, v := range ourMap {
if _, ok := structAsMap[k]; ok {
panic("key collision")
}
structAsMap[k] = v
}

bytes, err := json.Marshal(structAsMap)
if err != nil {
panic(err)
}

return bytes
}