diff --git a/AWS/1)_AWS_CUR_Pre-Processor.trs b/AWS/1)_AWS_CUR_Pre-Processor.trs new file mode 100644 index 0000000..ed0fd9e --- /dev/null +++ b/AWS/1)_AWS_CUR_Pre-Processor.trs @@ -0,0 +1,196 @@ +option loglevel = INFO +##################################################################### +# +# Template Transformer for Pre-Processing AWS CUR data +# ----------------------------------------------------- +# +# The following Transformers are required for loading AWS data: +# 1. AWS Pre-Processor Transformer +# 2. AWS Consolidation Transformer +# +# This Transformer Template is pre-processing a single CUR data feed. +# In order to handle multiple payer accounts, additional changes +# might be required. Reach out to support@exivity.com for more details. +# +#####################################################################aa +# +# AWS CUR data for any given month may contain fees with a UsageStartDate that is in +# the next month. However these fees are reflected in the AWS invoice so we need to +# include them in the Exivity report for this month. +# +# To do this, if we're processing the first of the month then we import ALL records +# with an ItemType of "Fee" regardless of their UsageStartDate so that they get charged +# on the 1st of the month. +# +# The conditional logic below therefore applies a different import filter depending on +# whether the dataDate is the first of the month or not. +# +if ("${dataDay}" == "01") { + import "system/extracted/aws/${dataYear}${dataMonth}/.*[0-9]*\.csv" source aws alias cur options { + pattern on + filter = ([lineItem\/UsageStartDate] =~ /${dataYear}-${dataMonth}-${dataDay}.*/ || ["lineItem\/LineItemType"] == "Fee") + } + } else { + import "system/extracted/aws/${dataYear}${dataMonth}/.*[0-9]*\.csv" source aws alias cur options { + pattern on + filter = ([lineItem\/UsageStartDate] =~ /${dataYear}-${dataMonth}-${dataDay}.*/ ) + } +} + +# Simplify the column names we want to work with +rename column ${/.*bill.PayerAccountId/} to PayerAccountId +rename column ${/.*bill.BillingEntity/} to BillingSource +rename column ${/.*lineItem.UsageAccountId/} to LinkedAccountId +rename column ${/.*lineItem.LineItemType/} to LineItemType +rename column ${/.*lineItem.UsageStartDate/} to UsageStartDate +rename column ${/.*lineItem.UsageEndDate/} to UsageEndDate +rename column ${/.*lineItem.UsageType/} to UsageType +rename column ${/.*lineItem.Operation/} to Operation +rename column ${/.*lineItem.AvailabilityZone/} to AvailabilityZone +rename column ${/.*lineItem.ResourceId/} to ResourceId +rename column ${/.*lineItem.UsageAmount/} to UsageQuantity +rename column ${/.*lineItem.NormalizedUsageAmount/} to NormalizedUsageAmount +rename column ${/.*lineItem.CurrencyCode/} to CurrencyCode +rename column ${/.*lineItem.BlendedRate/} to BlendedRate +rename column ${/.*lineItem.BlendedCost/} to BlendedCost +rename column ${/.*lineItem.UnblendedRate/} to UnblendedRate +rename column ${/.*lineItem.UnblendedCost/} to UnblendedCost +rename column ${/.*lineItem.LineItemDescription/} to LineItemDescription +rename column ${/.*product.ProductName/} to ProductName +rename column ${/.*product.operatingSystem/} to box_type +rename column ${/.*product.usagetype/} to usagetype2 +rename column ${/.*pricing.term/} to ReservedInstance +rename column ${/.*pricing.unit/} to unit +if (@COLUMN_EXISTS(discount\/PrivateRateDiscount)) { + rename column ${/.*discount.PrivateRateDiscount/} to PrivateRateDiscount +} else { + create column PrivateRateDiscount value 0 +} +if (@COLUMN_EXISTS(discount\/SppDiscount)) { + rename column ${/.*discount.SppDiscount/} to SppDiscount +} else { + create column SppDiscount value 0 +} +if (@COLUMN_EXISTS(discount\/EdpDiscount)) { + rename column ${/.*discount.EdpDiscount/} to EdpDiscount +} else { + create column EdpDiscount value 0 +} + +delete columns except PayerAccountId BillingSource LinkedAccountId LineItemType LineItemDescription ReservedInstance ResourceId unit UsageType box_type BlendedRate BlendedCost UnblendedRate UnblendedCost Operation AvailabilityZone ProductName UsageQuantity PrivateRateDiscount SppDiscount EdpDiscount UsageStartDate + +# Ensure that fees with a usage start date sometime in 'next month' are included in reports +# Convert the UsageStartDate to yyyyMMdd format +timestamp StartDate using UsageStartDate template "YYYY.MM.DD" format yyyymmdd +where (["StartDate"] > "${dataYear}${dataMonth}${dataMonthDays}") { + set StartDate to "${dataYear}${dataMonth}01" +} + +# Drop any records that do not have a UsageStartDate of the 1st of the month because +# based on the import filters combined with the logic above they must be Fees with a +# charge date later in this month +where (["StartDate"] != "${dataDate}") { + delete rows +} +delete column StartDate + +# Remove taxes +where (["LineItemType"] == "Tax") { + delete rows +} + +# Convert Fees to usage +where (["LineItemType"] == "Fee") { + set UsageQuantity to 1 + set BlendedRate as BlendedCost + # No need to set UnblendedRate as it gets set later as part of PRD discount calculations + set ResourceId to "Fee" +} + +where (["LineItemType"] == "BundledDiscount") { + set UsageQuantity to 1 + # No need to set UnblendedRate as it gets set later as part of PRD discount calculations + set BlendedRate as BlendedCost + set ResourceId to "BundledDiscount" +} + +# Handle credits. There may be multiple credits for the same day but with different +# rates, so we need to normalise the data for credits such that all instances have the +# same rate. We do this by summing the UnblendedCost using aggregation, and charging it +# as a single unit of consumption. +where (["LineItemType"] == "Credit") { + move rows to aws.credits +} + +if (!@DSET_EMPTY(aws.credits)) { + default dset aws.credits + aggregate notime LinkedAccountId match UnblendedCost sum# + delete column EXIVITY_AGGR_COUNT + set LineItemDescription to "AWS Credit" + set UsageType to "Credit" + set LineItemDescription to "Credit" + set ProductName to "Credit" + set ResourceId to "Credit" + set UsageQuantity to 1 + set BlendedRate as BlendedCost + default dset aws.cur + append aws.credits to aws.cur + delete dset aws.credits +} + +# Set rate and COGS +create column rate +create column adjusted_price +set adjusted_price = ([UnblendedCost] + [PrivateRateDiscount]) # PRD is negative +set rate = ([adjusted_price] / [UsageQuantity]) +delete column adjusted_price + +create column cogs # Don't really need COGS but setting them +set cogs as BlendedRate # for diagnostic/contract purposes only + +# Identify between Reserved and On demand instances (blanks = Reserved) +where ([ReservedInstance] == "") { + set ReservedInstance to "R" + } +where ([ReservedInstance] == "OnDemand") { + set ReservedInstance to "OD" +} + +# Set default values +option overwrite = no +set ResourceId to "Generic Consumption Record" +where ([unit] == "") { + set unit to "Units" +} + +# Create service key/name +replace " " in ProductName +create mergedcolumn service_key separator "|" from ReservedInstance UsageType box_type Operation AvailabilityZone ProductName +create mergedcolumn service_name separator " " from ProductName ReservedInstance UsageType box_type Operation AvailabilityZone + +# Remove records without costs +where ([UnblendedCost] == 0 || [UsageQuantity] == 0) { + delete rows +} + +option overwrite = yes +where (["LineItemType"] == "Fee") { + set UsageType to "Fees" + set ProductName to "Fees" + set unit to "Fees" +} + +# Copy Marketplace consumption out to a separate DSET +where (["BillingSource"] == "AWS Marketplace") { + set ProductName to "AWSMarketplace" # ProductName is service category (see AWS_consolidate.trs) +} +delete column BillingSource + +# Export enterprise support fees, if present +where (["LineItemType"] == "Fee" && ["LineItemDescription"] == "AWS Support (Enterprise)") { + copy rows to aws.enterpriseFees +} + +# Export the transformed data +export aws.cur as aws/${dataYear}/${dataMonth}/${dataDate}_aws.csv +terminate \ No newline at end of file diff --git a/AWS/2)_AWS_CUR_Consolidation.trs b/AWS/2)_AWS_CUR_Consolidation.trs new file mode 100644 index 0000000..02f051e --- /dev/null +++ b/AWS/2)_AWS_CUR_Consolidation.trs @@ -0,0 +1,61 @@ +option loglevel = INFO +##################################################################### +# +# Template Transformer for consolidating AWS CUR data +# ---------------------------------------------------- +# +# The following Transformers are required for loading AWS data: +# 1. AWS Pre-Processor Transformer +# 2. AWS Consolidation Transformer <---current Transformer +# +# This Transformer Template consoldiated pre-processed CUR data. +# In order to handle multiple payer accounts, additional changes +# might be required. Reach out to support@exivity.com for more details. +# +##################################################################### + +import exported/aws/${dataYear}/${dataMonth}/${dataDate}_aws.csv source AWS alias consolidated options { + pattern = yes +} + +create mergedcolumn tmp using string "C|" column service_key +delete column service_key +rename column tmp to service_key + +# Maintain a lookup file that will be used to map LinkedAccountIDs to PayerAccountIds during EIB +# export. This lookup file needs to contain all mappings seen in the month-to-date +where ([PayerAccountId] != "" && [LinkedAccountId] != "") { + copy rows to payer.lookup +} +default dset payer.lookup +delete columns except PayerAccountId LinkedAccountId + +if ("${dataDay}" != "01") { + # From 2nd onwards, merge today's lookup data into the existing lookup file + import "exported/lookup/${dataYear}${dataMonth}_PayerAccountLookup.csv" source month alias lookup + append month.lookup to payer.lookup +} + +aggregate notime PayerAccountId match LinkedAccountId match +delete column EXIVITY_AGGR_COUNT +export payer.lookup as lookup/${dataYear}${dataMonth}_PayerAccountLookup.csv + +default dset AWS.consolidated +delete dset payer.lookup + +finish + +option services = readonly +services { + effective_date = 20230101 + service_type = automatic + description_col = service_name # column name + category_col = ProductName # column with category value + instance_col = ResourceId # the unique instance i.e. vm-id, username, etc + usages_col = service_key # the column containing the name of the consumed service usagetype2 + rate_col = rate # the column containing the rate values + cogs_col = cogs # the column containing the CoG rate values + interval = individually + unit_label_col = unit # the column containing the unit label + consumption_col = UsageQuantity # the column containing the consumed quantity +} \ No newline at end of file diff --git a/AWS/AWS_CUR_Extractor_(Athena).use b/AWS/AWS_CUR_Extractor_(Athena).use deleted file mode 100644 index d5bc2c3..0000000 --- a/AWS/AWS_CUR_Extractor_(Athena).use +++ /dev/null @@ -1,158 +0,0 @@ - -###################################################################### -# -# This is a Template Extractor for AWS CUR using Athena. -# -# This Extractor assumes that you have already configured Athena. -# If you have not done this already, please follow the instructions -# in the following tutorial: -# -# - https://docs.exivity.com/getting-started/tutorials/amazon-aws-stack -# -# When executing this script, it assumes two arguments: -# - FROM and TO date in: yyyyMMdd format -# -# NOTE: FROM and TO date should not overlap more then 1 month. -# In case it does overlap 1 month, it will reset the FROM -# date to the last day of the FROM month. And the TO date -# to the first day of the TO month. Support for overlapping -# months will be included in a future release. -# -###################################################################### - -# Set this to 1 to enable a debug trace output when the script is run -var DEBUG = "0" -# This is the text that appears to the left and right of debug headings -var banner = "________" - -###################################################################### -# Customer specific values here (these can be encrypted if required) # -# # -# The Athena End Point that holds the AWS CUR billing data -public var API_EndPoint = "https://your.athene.endpoint.com/v1/QueryAthena2" -# The Athena database name -public var DBName = "aws_billing_report_dbname" -# The table that holds your data -public var TableName = "my_cur_report" -# Provide the AWS S3 bucket where the Athena writes its results, so the Extractor can download it -public var bucket = "s3://YourS3Bucket" -# The API key that the Extractor will use to access the bucket -public var API_Key = "YourAppKey" -# # -# End customer specific values # -###################################################################### - - -# Check if we have two parameters (from and to date) -if (${ARGC} != 2) -{ - print "This requires 2 argument, the day to collect usage for, and the date following that day, both in yyyyMMdd format" - terminate -} else { - var today = ${ARG_1} - var tomorrow = ${ARG_2} -} - -# Validate the date formats -match date "^([0-9]{8})$" ${today} -if (${date.STATUS} != MATCH) { - print Argument 1 error: ${today} is not in today format - terminate with error -} - -match date "^([0-9]{8})$" ${tomorrow} -if (${date.STATUS} != MATCH) { - print Argument 2 error: ${tomorrow} is not in today format - terminate with error -} - -# Extract the day, month and year fields from the dates -match day "^[0-9]{6}([0-9]{2})" ${today} -if (${day.STATUS} != MATCH) { - print Could not extract day from the supplied date (${today}) - terminate -} else { - var day = ${day.RESULT} -} - -match month "^[0-9]{4}([0-9]{2})[0-9]{2}" ${today} -if (${day.STATUS} != MATCH) { - print Could not extract month from the supplied date (${today}) - terminate -} else { - var month = ${month.RESULT} -} - -match year "^([0-9]{4})[0-9]{4}" ${today} -if (${year.STATUS} != MATCH) { - print Could not extract year from the supplied date (${today}) - terminate -} else { - var year = ${year.RESULT} -} - -match day "^[0-9]{6}([0-9]{2})" ${tomorrow} -if (${day.STATUS} != MATCH) { - print Could not extract day from the supplied date (${tomorrow}) - terminate -} else { - var tomorrow_day = ${day.RESULT} -} - -match month "^[0-9]{4}([0-9]{2})[0-9]{2}" ${tomorrow} -if (${day.STATUS} != MATCH) { - print Could not extract month from the supplied date (${tomorrow}) - terminate -} else { - var tomorrow_month = ${month.RESULT} -} - -match year "^([0-9]{4})[0-9]{4}" ${tomorrow} -if (${year.STATUS} != MATCH) { - print Could not extract year from the supplied date (${tomorrow}) - terminate -} else { - var tomorrow_year = ${year.RESULT} -} - -# Set some variables to facilitate running for -# multiple days within a single month. -var date_diff = ${tomorrow_day} -var date_diff -= ${day} -var next_day = ${day} - -if (${month} != ${tomorrow_month}) { - print "WARNING! spanning two months not available for > 1 day." - print "Resetting day variable to last day of the FROM month ..." - get_last_day_of ${year}${month} as day - print "Resetting tomorrow_day variable to first day of the to month ..." - var tomorrow_day = 01 - var date_diff = 1 -} -# Encrypts the bucket variable in a URL friendly manner. -uri encode-component bucket - -var date_diff += 1 - -loop date_range ${date_diff} { - -# Creates the query with the day that the user has input. -var query = "SELECT bill_InvoiceId, bill_PayerAccountId, lineItem_UsageAccountId, lineItem_LineItemType, lineItem_UsageStartDate, lineItem_UsageEndDate, lineItem_UsageType, lineItem_Operation, lineItem_AvailabilityZone, lineItem_ResourceId, lineItem_UsageAmount, lineItem_NormalizedUsageAmount, lineItem_CurrencyCode, lineItem_BlendedRate, lineItem_BlendedCost, lineItem_LineItemDescription, product_ProductName, product_operatingSystem, product_usagetype, pricing_term, pricing_unit FROM ${DBName}.${TableName} where lineItem_UsageStartDate LIKE '${year}-${month}-${next_day}%';" -print ${query} -# Encrypts the query and the bucket variables in a URL friendly manner. -uri encode-component query -print "${API_EndPoint}?query=${query}&s3output=${bucket}" - -#First API Call to obtain the Json with the url of the queried report. -print "${API_EndPoint}?query=${query}&s3output=${bucket}" -clear http_headers -set http_header "x-api-key: ${API_Key}" -buffer API = http GET "${API_EndPoint}?query=${query}&s3output=${bucket}" -#Downloads the report from the URL previously created and saves it locally. -set http_savefile "./extracted/AWS_CUR/${year}${month}${next_day}CUR.csv" -http GET $JSON{API}.[url] - -print "This is date_range loop number: ${date_range.COUNT}" - var next_day += 1 -print ${next_day} -} \ No newline at end of file diff --git a/AWS/AWS_CUR_Transformer.trs b/AWS/AWS_CUR_Transformer.trs deleted file mode 100644 index fb78c38..0000000 --- a/AWS/AWS_CUR_Transformer.trs +++ /dev/null @@ -1,223 +0,0 @@ -option loglevel = INFO -# =================================================================== -# -# Example Transformer for AWS CUR for S3 based Extractions -# -# =================================================================== -# Import Current Month of CUR Files -# -# Current list of fields in the include option matches the defined data definition document of required fields. -# -import system/extracted/aws/${dataYear}${dataMonth}/.*-[0-9]*\.csv source AWS_CUR alias usage1 options { - pattern on # Change the value depending on your report name - filter = (["lineItem/UsageStartDate"] =~ /${dataYear}-${dataMonth}-${dataDay}.*/) - include bill/BillingEntity lineItem/UsageAccountId lineItem/LineItemType lineItem/UsageStartDate lineItem/UsageEndDate lineItem/ProductCode lineItem/UsageType lineItem/Operation lineItem/AvailabilityZone lineItem/ResourceId lineItem/UsageAmount lineItem/UnblendedRate lineItem/UnblendedCost lineItem/BlendedCost product/ProductName product/operatingSystem product/instanceType product/location pricing/term pricing/unit reservation/AmortizedUpfrontCostForUsage reservation/AvailabilityZone reservation/EffectiveCost reservation/UnusedQuantity reservation/UnusedRecurringFee -} - -# rename system generated names to user friendly names for display on reports -rename column "lineItem/UsageAccountId" to LinkedAccountId -rename column pricing/term to "Pricing Type" -rename column product/operatingSystem to "OS_Type" -rename column lineItem/ResourceId to ResourceId - -# Fix data in Pricing Type, OnDemand becomes On Demand -replace "OnDemand" in "Pricing Type" with "On Demand" - -# ==================================================================== -# Uncomment this part to correlate LinkedAccountId with Customer Names -# Set linked_accounts to the real customer name. -# Import the Customer Lookup table. - -### -# import "import/lookup/linked_accounts.csv" source aws alias linked_accounts -# correlate aws.linked_accounts.LinkedAccountName using LinkedAccountId -# rename column LinkedAccountName to Customer -### - -# remove irrelevant snapshot and workspace resource details to preserve cloud source resource name -option overwrite = no -set ResourceId to "Generic Resource/" -split ResourceId using "/" -delete columns ResourceId ResourceId_split2 ResourceId_split3 ResourceId_split4 -rename column ResourceId_split1 to CS_ResourceId - -# set system variables for Rate, CoGS and Units of measure -# NOTE: the rate for Reserved Instances is not supplied from source so we must calculate it -rename column pricing/unit to Units -# put "Units" into data where there is no value -set Units to "Units" -# For PartialUpfrontCost we must also include reservation/AmortizedUpfrontCostForUsage to get the correct cs_rate -# that is (reservation/EffectiveCost - reservation/AmortizedUpfrontCostForUsage) / lineItem/UsageAmount -# The RDS calculation is believed to be correct, but we have not yet tested because of no test data yet -create column cs_cogs value 0 -# need to turn on overwrite because the column is already set to 0 -option overwrite = yes -create column cs_rate -calculate column cs_rate as column "lineItem/BlendedCost" / column "lineItem/UsageAmount" - -if (!@COLUMN_EXISTS("reservation/AvailabilityZone")) { - create column "reservation/AvailabilityZone" - set "reservation/AvailabilityZone" as "lineItem/AvailabilityZone" -} - -# Modification May 19 - -where ((["Pricing Type"] == "") && (["lineItem/LineItemType"] != "RIFee" )) { - calculate column cs_rate as column "lineItem/UnblendedCost" / column "lineItem/UsageAmount" - set "Pricing Type" to "On Demand" -} - -# End of Modification -where ((["Pricing Type"] == "Reserved") && (["product/ProductName"] == "Amazon Elastic Compute Cloud")) { - move rows to EXCEPTIONS.new -} - -#Environment Improvement Suggestions -where ( (["lineItem/LineItemType"] == "DiscountedUsage") && (["reservation/EffectiveCost"] > "0") && (["product/ProductName"] == "Amazon Elastic Compute Cloud")) { - #calculate column "EC-AUFCFU" as column "reservation/EffectiveCost" - column reservation/AmortizedUpfrontCostForUsage - #calculate column ResRate as column "EC-AUFCFU" / column lineItem/UsageAmount - #set cs_rate as ResRate - move rows to EXCEPTIONS.new -} -where ((["Pricing Type"] == "Reserved") && (["product/ProductName"] == "Amazon Relational Database Service")) { - move rows to EXCEPTIONS.new -} -option overwrite = no -# Change the ProductName for Amazon Elastic Compute into multiple categories: Virtual Machines, EBS Storage, Snapshots, ElasticIP, LoadBalancer -# and Other Services to help identify these blocks of EC2 Services. -create mergedcolumn ProdName from product/ProductName -option overwrite = yes -where (["lineItem/UsageType"] =~ /.*(BoxUsage).*/) { - set ProdName to "Amazon EC2 VM Services" -} -where ((["lineItem/UsageType"] =~ /.*(EBS:Volume).*/) || (["lineItem/UsageType"] =~ /.*(EBSOptimized).*/)) { - set ProdName to "Amazon EC2 EBS Storage" -} -where (["lineItem/UsageType"] =~ /.*(SnapshotUsage).*/) { - set ProdName to "Amazon EC2 EBS Snapshot" -} -where (["lineItem/UsageType"] =~ /.*(ElasticIP).*/) { - set ProdName to "Amazon EC2 ElasticIP" -} -where (["lineItem/UsageType"] =~ /.*(LoadBalancer).*/) { - set ProdName to "Amazon EC2 LoadBalancer" -} -where ((["ProdName"] == "Amazon Elastic Compute Cloud") && (["lineItem/LineItemType"] != "RIFee")) { -#Environment Improvement Suggestions - replace "Amazon Elastic Compute Cloud" in ProdName with "Amazon EC2 Other Services" -} -# If the server is not part of Amazon EC2 then we need to switch the Product Name back to its original Product Name. -# Example - a DynamoDB server should be in the AWS DynamoDB group. -where ( ( ["ProdName"] == "Amazon EC2 VM Services" ) && ( ["product/ProductName"] != "Amazon Elastic Compute Cloud" ) ) { - set ProdName as product/ProductName -} -# Move ProdName back to ProductName and delete the working variable ProdName -set product/ProductName as ProdName -Delete column ProdName -# If the UnblendedCost is less than 0 and UnblendedRate is blank - then we have an issued Credit. -# Build a credit record by setting the Rate to 1, and UsageAmount to the credit. -# Add metadata tag value for Business Unit, Landscape, Application and ResourceId. -create column Credit value "Credit" -option overwrite = yes -where (["lineItem/LineItemType"] == "Credit") { - create mergedcolumn temp_landscape separator " " from Credit product/ProductName - create mergedcolumn temp_application separator " " from Credit lineItem/UsageType - set CS_ResourceId to "Credit - Not Named" - set lineItem/AvailabilityZone as product/location - set lineItem/UsageAmount as "lineItem/UnblendedCost" - set cs_rate to 1 -} -delete columns Credit temp_landscape temp_application -# Capture UnusedQuantity for Reserved Instances that are 'RIFee' and not ProductName "Amazon DynamoDB" -# This is only done as part of the End of Month processing. At this time we only know of Reserves for -# EC2 and RDS instances. Will need to code for each product type. -create column ResZone value "Reservation Zone" -where ((["lineItem/LineItemType"] == "RIFee") && (["product/ProductName"] == "Amazon Elastic Compute Cloud")) { -# set lineItem/UsageAmount as reservation/UnusedQuantity - set "lineItem/AvailabilityZone" as reservation/AvailabilityZone - create mergedcolumn temp_resAZ separator " " from ResZone lineItem/AvailabilityZone - set CS_ResourceId to "Under Utilized Reserve - Not Named" - replace "Amazon Elastic Compute Cloud" in product/ProductName with "Amazon EC2 VM Services" -} -#Environment Improvement Suggestions -where ((["lineItem/LineItemType"] == "RIFee") && (["product/ProductName"] == "Amazon Relational Database Service")) { -# set lineItem/UsageAmount as reservation/UnusedQuantity - set lineItem/AvailabilityZone as reservation/AvailabilityZone - create mergedcolumn temp_RDSresAZ separator " " from ResZone lineItem/AvailabilityZone - set CS_ResourceId to "Under Utilized Reserve - Not Named" -} -delete columns ResZone temp_resAZ temp_RDSresAZ -option overwrite = no -# service_opp will be used in the key value and better describe the service description -create column service_opp -where ( ["lineItem/AvailabilityZone"] == "" ) { - set service_opp as lineItem/Operation - replace "None" in service_opp - replace "Unknown" in service_opp - replace "EBS:" in service_opp - replace "Not Applicable" in service_opp -} - -# create service names and key's -create mergedcolumn service_name separator " " from lineItem/UsageType OS_Type lineItem/AvailabilityZone service_opp -replace "HeavyUsage" in service_name with "Reserved" -replace "Heavy Utilization" in service_name with "Reserved" -create mergedcolumn service_key separator "-" from "Pricing Type" lineItem/ProductCode lineItem/UsageType OS_Type lineItem/Operation lineItem/AvailabilityZone - -# If the usage record is for an AWS Marketplace resource, then modify the service_name and ProductName so that the descriptions match the display -# formats used by non-Marketplace resources. -option overwrite = yes -where ( ["bill/BillingEntity"] == "AWS Marketplace" ) { -create mergedcolumn mktplace_service_name separator " " from product/ProductName lineItem/UsageType OS_Type lineItem/AvailabilityZone service_opp -replace " SoftwareUsage" in mktplace_service_name -set service_name as mktplace_service_name -# Environment Improvement Suggestions -set product/ProductName as bill/BillingEntity -} -delete column mktplace_service_name - -# Fill blank values in service_name -option overwrite = no -create mergedcolumn service_name_blank separator " " from product/ProductName lineItem/LineItemType -set service_name as service_name_blank -set CS_ResourceId as service_name_blank -delete column service_name_blank - -# aggregate the XXXX DSET to combine like records into a single entry and sum the UsageQty -aggregate AWS_CUR.usage1 notime LinkedAccountId match service_key match cs_rate match CS_ResourceId match lineItem/UsageAmount sum -rename column OS_Type to "Operating System" -rename column lineItem/UsageAmount to "UsageQuanity" -rename column lineItem/AvailabilityZone to "CS_Location" -rename column LinkedAccountId to "CS_AccountID" -rename column lineItem/UsageStartDate to "CS_StartDate" -rename column lineItem/UsageType to "UsageType" -rename column product/ProductName to "Product Group" -rename column bill/BillingEntity to "Billing Entity" -rename column lineItem/LineItemType to "Line Item Type" -create column interval value individually - -# Export EXCEPTIONS and copy of the data load set to .csv file on local server -# export EXCEPTIONS.001_records_no_fee_or_rate as "EXCEPTIONS\001_records_no_fee_or_rate_CUR_EOM_${dataDate}.csv" -# delete columns that we do not want to load into the database -delete columns service_opp KeyExtention App Service Bundl BU RecID Id_Program lineItem/Operation lineItem/ProductCode lineItem/UnblendedRate lineItem/UnblendedCost product/instanceType product/location reservation/AvailabilityZone reservation/UnusedQuantity EXIVITY_AGGR_COUNT -delete columns reservation/AmortizedUpfrontCostForUsage reservation/EffectiveCost reservation/UnusedRecurringFee -export AWS_CUR.usage1 as "aws/CUR_with_etl_dataout_EOM_${dataDate}.csv" -default dset AWS_CUR.usage1 -export EXCEPTIONS.new as Exceptions_${dataDate}.csv -finish - -#option services = overwrite - -services { - effective_date = 20190101 - service_type = automatic - description_col = service_name # column name - category_col = "Product Group" # column with category value - instance_col = CS_ResourceId # the unique instance i.e. vm-id, username, etc - usages_col = service_key # the column containing the name of the consumed service - rate_col = cs_rate # the column containing the rate values - cogs_col = cs_cogs # the column containing the CoG rate values - interval_col = interval # the column containing the interval (i.e. individually) - unit_label_col = Units # the column containing the unit label - consumption_col = UsageQuanity # the column containing the consumed quantity -} diff --git a/AWS/AWS_CUR_Transformer_(simplified).trs b/AWS/AWS_CUR_Transformer_(simplified).trs deleted file mode 100644 index 3d4c9ed..0000000 --- a/AWS/AWS_CUR_Transformer_(simplified).trs +++ /dev/null @@ -1,98 +0,0 @@ -option loglevel = INFO -##################################################################### -# -# Example Transformer for AWS CUR for S3 based Extractions -# -#####################################################################aa -# Import Current Month of CUR Files -# -import "system/extracted/aws/${dataYear}${dataMonth}/.*[0-9]*\.csv" source aws alias cur options { - pattern on - filter = ([lineItem\/UsageStartDate] =~ /${dataYear}-${dataMonth}-${dataDay}.*/) -} - -# Normalise the columns names -rename column ${/.*bill.PayerAccountId/} to PayerAccountId -rename column ${/.*lineItem.UsageAccountId/} to LinkedAccountId -rename column ${/.*lineItem.LineItemType/} to LineItemType -rename column ${/.*lineItem.UsageStartDate/} to UsageStartDate -rename column ${/.*lineItem.UsageEndDate/} to UsageEndDate -rename column ${/.*lineItem.UsageType/} to UsageType -rename column ${/.*lineItem.Operation/} to Operation -rename column ${/.*lineItem.AvailabilityZone/} to AvailabilityZone -rename column ${/.*lineItem.ResourceId/} to ResourceId -rename column ${/.*lineItem.UsageAmount/} to UsageQuantity -rename column ${/.*lineItem.NormalizedUsageAmount/} to NormalizedUsageAmount -rename column ${/.*lineItem.CurrencyCode/} to CurrencyCode -rename column ${/.*lineItem.BlendedRate/} to BlendedRate -rename column ${/.*lineItem.BlendedCost/} to BlendedCost -rename column ${/.*lineItem.UnblendedRate/} to UnblendedRate -rename column ${/.*lineItem.UnblendedCost/} to UnblendedCost -rename column ${/.*lineItem.LineItemDescription/} to LineItemDescription -rename column ${/.*product.ProductName/} to ProductName -rename column ${/.*product.operatingSystem/} to box_type -rename column ${/.*product.usagetype/} to usagetype2 -rename column ${/.*pricing.term/} to ReservedInstance -rename column ${/.*pricing.unit/} to unit - -# NOTE: -# ===== -# Make sure to provide a manual lookup file using the Lookup Manager. -# The file should look like the following example: -# ---------------------------- -# | LinkedAccountId,Customer | -# | 123456789,My Company Name | -# | 0987654321,Some Other Corp | -# ---------------------------- -# -#import "import/lookup/aws_customer_lookup.csv" source aws alias customers -#correlate Customer using LinkedAccountId assuming aws.customers - - -# Identify between Reserved and On demand instances (blanks = Reserved) -where ([ReservedInstance] == "") { - set ReservedInstance to "Reserved" - } - -# set standard values -option overwrite = no -set ResourceId to "Generic Consumption Record" -# create column interval value individually - -# set generic unit value to blank cells -where ([unit] == "") { - set unit to "Units" -} -create mergedcolumn service_key separator " | " from ReservedInstance UsageType box_type Operation AvailabilityZone ProductName -create mergedcolumn service_name separator " " from ProductName ReservedInstance UsageType box_type Operation AvailabilityZone -# Remove records without costs -# This can be removed, when you're interested in consumption rather then costs -where ( [BlendedRate] == 0 ) { - delete rows -} - -# default rate -create column rate -set rate = [UnblendedRate]*1 -# default cogs -create column cogs -set cogs = [UnblendedRate]*1 - -# aggregate dataset -aggregate notime LinkedAccountId match service_key match rate match ResourceId match UsageQuantity sum - -finish - -services { - effective_date = 20190101 - service_type = automatic - description_col = service_key # column name - category_col = ProductName # column with category value - instance_col = ResourceId # the unique instance i.e. vm-id, username, etc - usages_col = service_name # the column containing the name of the consumed service usagetype2 - rate_col = rate # the column containing the rate values - cogs_col = cogs # the column containing the CoG rate values - interval = individually - unit_label_col = unit # the column containing the unit label - consumption_col = UsageQuantity # the column containing the consumed quantity -} \ No newline at end of file diff --git a/AWS/AWS_DBR_S3_Extractor.use b/AWS/AWS_DBR_S3_Extractor.use deleted file mode 100644 index 7c532ab..0000000 --- a/AWS/AWS_DBR_S3_Extractor.use +++ /dev/null @@ -1,224 +0,0 @@ -################################################################# -# This USE script will download a file from an S3 bucket # -# # -# This needs one parameter: YYYYMMDD -# --------------------------------------------------------------# -# NOTES: # -# - This script hardcodes the Region as eu-central-1 but this # -# can easily be changed or made a parameter as required # -################################################################# - -# Set this to 1 to enable a debug trace output when the script is run -var DEBUG = "0" - -# This is the text that appears to the left and right of debug headings -var banner = "________" - -###################################################################### -# Customer specific values here (these can be encrypted if required) # -# # -public var bucket = "" -public var LinkedAccountId = "" -public var AWS_Region = "us-east-1" # make sure to provide the correct region -public var AWS_Service = "s3" -public var access_key = "" -public var secret_key = "" - -# # -# End customer specific values # -###################################################################### - -# This is the SHA256 hash of an empty string (required if making a request with no body) -var hashed_empty_string = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" - -# -# compile the filename using the date -# -var dataDate = "${ARG_1}" -# Obtain the month from the Date -match dataMonth "^[0-9]{4}([0-9]{2})[0-9]{2}" ${dataDate} -if (${dataMonth.STATUS} != MATCH) { - print Could not extract month from the supplied date (${dataDate}) - terminate -} else { - var dataMonth = ${dataMonth.RESULT} -} - -match dataYear "^([0-9]{4})[0-9]{4}" ${dataDate} -if (${dataYear.STATUS} != MATCH) { - print Could not extract year from the supplied date (${dataDate}) - terminate -} else { - var dataYear = ${dataYear.RESULT} -} - -var s3_object = "${LinkedAccountId}-aws-billing-detailed-line-items-with-resources-and-tags-${dataYear}-${dataMonth}.csv.zip" -var save_file = "${s3_object}" - -######################################################################################### -# SETUP # -# Create a number of variables to represent the various components that the steps # -# below are going to use in order to construct a correct AWS request # -#---------------------------------------------------------------------------------------# -# This is the request syntax for retrieving an object from a bucket: # -# GET / HTTP/1.1 # -# Host: .s3.amazonaws.com # -# Date: date # -# Authorization: authorization string # -######################################################################################### - -var HTTP_Method = "GET" -var URI = "${s3_object}" - var query_params -# Must have an empty variable for 'no query parameters' = "" -#var host = "${bucket}.s3-${AWS_Region}.amazonaws.com" -var host = "${bucket}.s3.amazonaws.com" -var date = "${OSI_TIME_UTC}" - -# Initialise config variables specific to this script -var save_path = "system/extracted/aws" - - - -######################################################################################### -# STEP 1 # -# Create a canonical request as documented at # -# at https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html # -######################################################################################### - -# 1a) Canonical Headers string -# - This is part of the Canonical Request string which will be generated below. -# - The Canonical Headers are a list of all HTTP headers (including values but -# with the header names in lowercase) separated by newline characters and in -# alphabetical order - -var canonical_headers = "date:${date}${NEWLINE}host:${host}${NEWLINE}x-amz-content-sha256:${hashed_empty_string}${NEWLINE}" -if (${DEBUG} == 1) { - print ${NEWLINE}${banner} Canonical Headers ${banner}${NEWLINE}${canonical_headers} -} - -# 1b) Signed Headers string -# - This is a list of the header names that were used to create the Canonical Headers, -# separated by a semicolon -# - This list MUST be in alphabetical order -# - NOTE: There is no trailing newline on this variable (we need to use it both with and without -# a newline later so we explicitly add a ${NEWLINE} when we need to) - -var signed_headers = "date;host;x-amz-content-sha256" -if (${DEBUG} == 1) { - print ${banner} Signed Headers ${banner}${NEWLINE}${signed_headers}${NEWLINE} -} - -# 1c) Canonical Request -# - The above are now combined to form a Canonical Request, which is created as follows: -# - HTTPRequestMethod + '\n' + URI + '\n' + QueryString + '\n' + CanonicalHeaders + '\n' + -# SignedHeaders + '\n' + Base16 encoded SHA256 Hash of any body content -# - Note that the Canonical Headers are followed by an extra newline (they have one already) - -var canonical_request = "${HTTP_Method}${NEWLINE}/${URI}${NEWLINE}${query_params}${NEWLINE}${canonical_headers}${NEWLINE}${signed_headers}${NEWLINE}${hashed_empty_string}" -if (${DEBUG} == 1) { - print ${banner} Canonical Request ${banner}${NEWLINE}${canonical_request}${NEWLINE} -} - -# 1d) Hash of the Canonical Request -# - This is an SHA256 hash of the Canonical Request string - -hash sha256 canonical_request as hashed_canonical_request - -###################################################################################### -# STEP 2 # -# Create a 'string to sign' as documented at # -# at https://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html # -#------------------------------------------------------------------------------------# -# In a nutshell this is the following components separated by newlines: # -# 2a) Hash algorithm designation # -# 2b) UTC date in YYYYMMDD'T'HHMMSS'Z' format # -# 2c) credential scope (date/region/service/"aws4_request") # -# 2d) base16-encoded hashed canonical request # -###################################################################################### - -# Extract the yyyyMMdd from the UTC time -match yyyyMMdd "(.{8})" ${date} -var yyyyMMdd = "${yyyyMMdd.RESULT}" - - var string_to_sign = "AWS4-HMAC-SHA256${NEWLINE}${date}${NEWLINE}${yyyyMMdd}/${AWS_Region}/${AWS_Service}/aws4_request${NEWLINE}${hashed_canonical_request}" -# var string_to_sign = "AWS4-HMAC-SHA256${NEWLINE}${date}${NEWLINE}${yyyyMMdd}/${AWS_Service}/aws4_request${NEWLINE}${hashed_canonical_request}" - -if (${DEBUG} == 1) { - print ${banner} String to sign ${banner}${NEWLINE}${string_to_sign}${NEWLINE} -} - -###################################################################################### -# STEP 3 # -# Calculate the signature for AWS Signature Version 4 as documented at: # -# at https://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html # -# # -###################################################################################### - -# 3a) Derive a signing key and apply it to the string to sign -# Use the secret access key to create the following hash-based auth codes: -# a) ksecret (our secret access key) -# b) kDate = HMAC("AWS4" + kSecret, Date) NOTE: yyyyMMdd only -# c) kRegion = HMAC(kDate, Region) -# d) kService = HMAC(kRegion, Service) -# e) kSigning = HMAC(kService, "aws4_request") -# f) HMAC the string_to_sign with the key derived using steps a - e - -var signature = "${string_to_sign}" - -if (${DEBUG} == 1) { - print ${banner}Deriving Signing Key using these parameters${banner}${NEWLINE}${secret_key} ${yyyyMMdd} ${AWS_Region} ${AWS_Service}${NEWLINE}${NEWLINE} -} - -# The following statement takes care of all the details listed above -# Notes: -# - The word 'signature' in the statement below is the NAME of a variable and -# NOT a reference to its contents -# - The contents of this variable are the string to sign, and after the statement -# has completed these contents will have been modified to be the authorization -# signature for that string -# -AWS_sign_string signature using ${secret_key} ${yyyyMMdd} ${AWS_Region} ${AWS_Service} - -###################################################################################### -# STEP 4 # -# Add the signing information to the request as documented at: # -# https://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html # -# # -###################################################################################### - -var credential_scope = "${yyyyMMdd}/${AWS_Region}/${AWS_Service}/aws4_request" - -if (${DEBUG} == 1) { - print ${banner} Credential Scope ${banner}${NEWLINE}${credential_scope}${NEWLINE}${NEWLINE} -} - - var auth_header = "Authorization: AWS4-HMAC-SHA256 Credential=${access_key}/${credential_scope}, SignedHeaders=${signed_headers}, Signature=${signature}" -if (${DEBUG} == 1) { - print ${banner} Authorization Header ${banner}${NEWLINE}${auth_header}${NEWLINE} -} -set http_header ${auth_header} - -####################################################### -# STEP 5 # -# Execute the query # -#-----------------------------------------------------# -# Note that all the headers that were included in the # -# signed_headers created in STEP 1 must be set before # -# the request is executed # -####################################################### - -set http_header "Date: ${date}" -set http_header "x-amz-content-sha256: ${hashed_empty_string}" -set http_savefile ${save_path}/${save_file} - -set http_progress yes -print "Downloading ${host}/${URI}:" -http GET https://${host}/${URI} - -buffer billing = FILE ${save_path}/${save_file} -unzip {billing} -save {billing} as ${save_path}/${save_file}.csv -discard {billing} - -print ${NEWLINE}Done diff --git a/AWS/AWS_DBR_Transformer.trs b/AWS/AWS_DBR_Transformer.trs deleted file mode 100644 index 0aea016..0000000 --- a/AWS/AWS_DBR_Transformer.trs +++ /dev/null @@ -1,80 +0,0 @@ -##################################################################### -# -# Example Transformer for AWS Detailed Billing Report line items -# -# Ensure to provide the correct LinkedAccountId with the import -# -##################################################################### - -option loglevel = DEBUGX -# import customer names from csv -import "system\extracted\aws\000000000000-aws-billing-detailed-line-items-with-resources-and-tags-${dataYear}-${dataMonth}.csv.zip.csv" source aws alias consumption - -# Set timestamp -var template = YYYY.MM.DD -timestamp START_DATE using UsageStartDate template ${template} format yyyymmdd -timestamp END_DATE using UsageEndDate template ${template} format yyyymmdd -timecolumns START_DATE END_DATE - -# Delete rows that are not "today" -where ( [START_DATE] != ${dataDate} ) { - delete row -} - -# Change Reserved Instance Y or N to Reserved or On Demand -where ([ReservedInstance] == "N") { - set ReservedInstance to "On Demand" -} -where ([ReservedInstance] == "Y") { - set ReservedInstance to "Reserved" -} - -# set standard values -option overwrite = NO -set ResourceId to "Generic Consumption Record" -create column interval value individually -create column Units value "Units" - -# create service names and key's -# get non small box type services -create mergedcolumn box_type from ItemDescription /.*On Demand (.*) [a-z][0-9]\..*/ - -create column service_opp -where ( [AvailabilityZone] == :EMPTY: ) { - set service_opp as Operation - replace "None" in service_opp - replace "Unknown" in service_opp - replace "EBS:" in service_opp - replace "Not Applicable" in service_opp -} - -create mergedcolumn service_name separator " " from ReservedInstance UsageType box_type AvailabilityZone service_opp -replace "On Demand " in service_name -replace " HeavyUsage" in service_name -create mergedcolumn service_key separator " - " from ReservedInstance UsageType box_type Operation AvailabilityZone RateId - -delete columns service_opp - -# Remove records without costs -# This can be removed, when you're interested in consumption rather then costs -where ( [Rate] == 0 ) { - delete rows -} - -export aws.consumption as "${dataDate}_aws_consumption_export.csv" - -finish - -services { - effective_date = 20180101 - service_type = automatic - description_col = service_name # column name - category_col = ProductName # column with category value - instance_col = ResourceId # the unique instance i.e. vm-id, username, etc - usages_col = service_key # the column containing the name of the consumed service - rate_col = Rate # the column containing the rate values - # cogs_col = BlendedRate # the column containing the CoG rate values - interval_col = interval # the column containing the interval (i.e. individually) - unit_label_col = Units # the column containing the unit label - consumption_col = UsageQuantity # the column containing the consumed quantity -}