diff options
-rw-r--r-- | .vscode/settings.json | 3 | ||||
-rw-r--r-- | README.md | 12 | ||||
-rw-r--r-- | cli.ts | 20 | ||||
-rw-r--r-- | countries.ts | 497 | ||||
-rw-r--r-- | lib.ts | 243 |
5 files changed, 775 insertions, 0 deletions
diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..035012d --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "deno.enable": false +}
\ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..8e5cfa1 --- /dev/null +++ b/README.md @@ -0,0 +1,12 @@ +# uic-parse + +A rather untested, sleep deprived UIC parsing project for the sake of no fucking clue why honestly. + +Allows encoding and decoding from strings, aswell as from/to JSON - in a lossless\* way. Attempts to do country discovery. Can validate or backfill check digits. + +\*for strings, assuming we can identify countries correctly + + +In library mode, it attempts to be a bit resillient by default and "fix" what it can - but (excluding throwing away garbage characters) can be configured to turn that into more aggressive erroring with options. + +In CLI mode, it defaults to all of those checks being enabled, and (for now with a single option without fine-grained control) being disablable with `--validate=false`. @@ -0,0 +1,20 @@ +import { parseArgs } from "jsr:@std/cli/parse-args"; +import UICVehicle from './lib.ts' + +const flags = parseArgs(Deno.args, { + boolean: ["validate", "json", "text"], + default: { + validate: true, + json: false, + text: false + } +}); + +const uic = new UICVehicle(flags._[0] ?? prompt('\x1b[0;1;32minput: \x1b[0mProvide a UIC wagon number:'), { + allowMissmatchedReportingMarkCountry: !flags.validate, + allowUnknownReportingMarkCountry: !flags.validate, + allowUICCountryUnknown: !flags.validate, + fetchOperatorFromReportingMark: true, + validateCheckDigit: flags.validate +}) +console.log(flags.json ? JSON.stringify(uic, null, 2) : flags.text ? uic.toString() : uic); diff --git a/countries.ts b/countries.ts new file mode 100644 index 0000000..6da9618 --- /dev/null +++ b/countries.ts @@ -0,0 +1,497 @@ +export type Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' +export class UICCountry<IdentificationNumber extends `${Digit}${Digit}` = `${Digit}${Digit}`> { + public constructor( + public readonly name: string, + /** Rest assured, unlike most ISO standards, https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 can help you for this one */ + public readonly iso: string, + public readonly treatyShort: string | null, + public readonly uicIdentifier: IdentificationNumber, + ) { + UICCountry.countryMaps.byCountryName.set(name, this) + UICCountry.countryMaps.byIso.set(iso, this) + if (treatyShort) + UICCountry.countryMaps.by1958TreatyShortCode.set(treatyShort, this) + UICCountry.countryMaps.byUICIdentificationNumber.set(uicIdentifier, this) + } + public static readonly countryMaps = Object.freeze({ + byCountryName: new Map<string, UICCountry>(), + byIso: new Map<string, UICCountry>(), + by1958TreatyShortCode: new Map<string, UICCountry>(), + byUICIdentificationNumber: new Map<`${Digit}${Digit}`, UICCountry>(), + }) + public equals(country2: UICCountry) { + return (this.iso === country2.iso && this.name === country2.name && this.treatyShort === country2.treatyShort && this.uicIdentifier === country2.uicIdentifier) + } + public toJSON() { + return { + name: this.name, + iso: this.iso, + treatyShort: this.treatyShort, + uicIdentifier: this.uicIdentifier + } + } +} + +// #region Country Data +// Sources: https://treaties.fcdo.gov.uk/data/Library2/pdf/1958-TS0049.pdf for variable length country codes - manually retyped +// https://en.wikipedia.org/wiki/List_of_UIC_country_codes for UIC country codes +((countries: [ + digit: `${Digit}${Digit}` | null, + code: string, + name: string +][]) => { + // #region Treaty Map String + const treatyMap = `Australia ........................ AUS +Austria ................................. A +Belgium .............................. B +Belgian Congo .................. CB +Bulgaria ........................... BG +Chile .............................. RCH +Czechoslovakia ..................... CS +Denmark ........................... DK +France ................................. F +Saar .............................. SA +India .............................. IND +Iran ................................. IR +Israel ................................. IL +Italy .................................... I +Lebanon ........................... RL +Luxembourg ........................ L +Netherlands ........................ NL +Norway .............................. N +Philippines ........................... PI +Poland .............................. PL +Sweden ................................. S +Switzerland ........................ CH +Turkey .............................. TR +Union of South Africa ......... ZA +United Kingdom .................. GB +Alderney ..................... GBA +Guernsey ..................... GBG +Jersey ........................... GBJ +Aden ........................... ADN +Bahamas ........................ BS +Basutoland ..................... BL +Bechuanaland .................. BP +British Honduras ............... BH +Cyprus ........................... CY +Gambia ..................... WAG +Gibraltar ..................... GBZ +Gold Coast .................. WAC +Hong Kong ..................... HK +Jamaica ........................... JA +Johore .............................. JO +Kedah ........................... KD +Kelantan ........................ KL +Kenya ........................ EAK +Labuan ........................... SS +Malacca ........................... SS +Malaya ( Negri Sembilan, +Pahang, Perak, Selangor) ... FM +Malta ........................... GBY +Mauritius ........................ MS +Nigeria ........................ WAN +Northern Rhodesia ............ NR +Nyasaland ........................ NP +Penang ........................... SS +Province Wellesley ............ SS +Seychelles ........................ SY +Sierra Leone ............... WAL +Somaliland ..................... SP +Southern Rhodesia ............ SR +Swaziland ........................ SD +Tanganyika .................. EAT +Trengganu ..................... TU +Trinidad ........................ TD +Uganda ........................ EAU +Grenada ..................... WG +St. Lucia ..................... WL +St. Vincent .................. WV +Zanzibar ..................... EAZ +United States of America ...... USA +Yugoslavia ....................... YU` + // #endregion + .split('\n').map(v => v.split(/ \.* /g)) as [name: string, code: string][]; + for (const country of countries) + if (country[0]) + new UICCountry(country[2], country[1], treatyMap.find(v => v[0] === country[2])?.[1] ?? null, country[0]) +})( + /** + (()=>{ + const baseCountriesWikipedia = document.querySelector('table.wikitable.sortable.jquery-tablesorter'); + const countries = []; + baseCountriesWikipedia.querySelectorAll('tbody > tr').forEach(v=>{ + const num = parseInt(v.querySelector('td').textContent.trim()).toString(); + const alphaCode = v.querySelector('td+td').textContent.trim(); + const sentientParsable = v.querySelector('td+td+td').textContent.trim(); + countries.push([num==='NaN'?null:num,alphaCode,sentientParsable]); + }); + console.log(countries) + })(); + */ + // #region UIC Countries + [ + [ + "10", + "FI", + "Finland" + ], + [ + "20", + "RU", + "Russia" + ], + [ + "21", + "BY", + "Belarus" + ], + [ + "22", + "UA", + "Ukraine" + ], + [ + "23", + "MD", + "Moldova" + ], + [ + "24", + "LT", + "Lithuania" + ], + [ + "25", + "LV", + "Latvia" + ], + [ + "26", + "EE", + "Estonia" + ], + [ + "27", + "KZ", + "Kazakhstan" + ], + [ + "28", + "GE", + "Georgia" + ], + [ + "29", + "UZ", + "Uzbekistan" + ], + [ + "30", + "KP", + "North Korea" + ], + [ + "31", + "MN", + "Mongolia" + ], + [ + "32", + "VN", + "Vietnam" + ], + [ + "33", + "CN", + "China" + ], + [ + "34", + "LA", + "Laos" + ], + [ + "40", + "CU", + "Cuba" + ], + [ + "41", + "AL", + "Albania" + ], + [ + "42", + "JP", + "Japan" + ], + [ + "44", + "BA", + "Bosnia and Herzegovina, Serb Republic of [note 1]" + ], + [ + "49", + "BA", + "Bosnia and Herzegovina" + ], + [ + "50", + "BA", + "Bosnia and Herzegovina, Muslim-Croat Federation of [note 1]" + ], + [ + "51", + "PL", + "Poland" + ], + [ + "52", + "BG", + "Bulgaria" + ], + [ + "53", + "RO", + "Romania" + ], + [ + "54", + "CZ", + "Czech Republic" + ], + [ + "55", + "HU", + "Hungary" + ], + [ + "56", + "SK", + "Slovakia" + ], + [ + "57", + "AZ", + "Azerbaijan" + ], + [ + "58", + "AM", + "Armenia" + ], + [ + "59", + "KG", + "Kyrgyzstan" + ], + [ + "60", + "IE", + "Ireland" + ], + [ + "61", + "KR", + "South Korea" + ], + [ + "62", + "ME", + "Montenegro" + ], + [ + "65", + "MK", + "North Macedonia" + ], + [ + "66", + "TJ", + "Tajikistan" + ], + [ + "67", + "TM", + "Turkmenistan" + ], + [ + "68", + "AF", + "Afghanistan" + ], + [ + "70", + "GB", + "United Kingdom" + ], + [ + "71", + "ES", + "Spain" + ], + [ + "72", + "RS", + "Serbia" + ], + [ + "73", + "GR", + "Greece" + ], + [ + "74", + "SE", + "Sweden" + ], + [ + "75", + "TR", + "Turkey" + ], + [ + "76", + "NO", + "Norway" + ], + [ + "78", + "HR", + "Croatia" + ], + [ + "79", + "SI", + "Slovenia" + ], + [ + "80", + "DE", + "Germany" + ], + [ + "81", + "AT", + "Austria" + ], + [ + "82", + "LU", + "Luxembourg" + ], + [ + "83", + "IT", + "Italy" + ], + [ + "84", + "NL", + "Netherlands" + ], + [ + "85", + "CH", + "Switzerland" + ], + [ + "86", + "DK", + "Denmark" + ], + [ + "87", + "FR", + "France" + ], + [ + "88", + "BE", + "Belgium" + ], + [ + "89", + "TZ", + "Tanzania" + ], + [ + "90", + "EG", + "Egypt" + ], + [ + "91", + "TN", + "Tunisia" + ], + [ + "92", + "DZ", + "Algeria" + ], + [ + "93", + "MA", + "Morocco" + ], + [ + "94", + "PT", + "Portugal" + ], + [ + "95", + "IL", + "Israel" + ], + [ + "96", + "IR", + "Iran" + ], + [ + "97", + "SY", + "Syria" + ], + [ + "98", + "LB", + "Lebanon" + ], + [ + "99", + "IQ", + "Iraq" + ], + [ + null, + "AU", + "Australia" + ], + [ + null, + "CA", + "Canada" + ], + [ + null, + "CD", + "DR Congo" + ], + [ + null, + "ZA", + "South Africa" + ], + [ + null, + "US", + "United States" + ] + ] + // #endregion +) +// #endregion @@ -0,0 +1,243 @@ +import { type Digit, UICCountry } from './countries.ts'; +export type UICParserOptions = { + /** + * Fetch the operator name from the {@link https://en.wikipedia.org/wiki/Reporting_mark#Europe_since_2006 reporting mark}. + * + * If `false`, reporting mark will be ignored entirely. If `true`, it will be used to query an operator string. Note that Diacritical marks are kept by this library, {@link https://en.wikipedia.org/wiki/Reporting_mark#Europe_since_2006 they should be removed prior to parsing}.<br/> + * + * Ignored if reporting mark not in UIC or unidentifiable + * + * @default true + * @see allowUnknownReportingMarkCountry + * @see allowMissmatchedReportingMarkCountry + */ + fetchOperatorFromReportingMark?: boolean, + /** + * Allow the country field in the {@link https://en.wikipedia.org/wiki/Reporting_mark#Europe_since_2006 reporting mark} to missmatch the UIC number. + * + * If `true`, reporting mark country will be ignored. If `false`, it will be validated. + * + * If {@link fetchOperatorFromReportingMark} is `false`, this value is ignored. + * + * @default true + * @see allowUnknownReportingMarkCountry + * @see fetchOperatorFromReportingMark + */ + allowMissmatchedReportingMarkCountry?: boolean, + /** + * Allow the country field in the {@link https://en.wikipedia.org/wiki/Reporting_mark#Europe_since_2006 reporting mark} to be an unknown value. + * + * If {@link fetchOperatorFromReportingMark} is `false`, this value is ignored. + * + * If {@link allowMissmatchedReportingMarkCountry} is `true`, this value is ignored. + * + * If {@link _dangerous_allowUICCountryUnknown} is `true`, this value is ignored when the UIC country could not be identified. + * + * If `true`, an unknown country is treated as if it was the correct country. If `false`, countries we can't identify will error. + * + * @default true + * @see allowMissmatchedReportingMarkCountry + * @see fetchOperatorFromReportingMark + */ + allowUnknownReportingMarkCountry?: boolean, + /** + * Allow the country field in the UIC code to be an unknown value. + * + * If `true`, an unknown country is given a value of null. If `false`, countries we can't identify will error. + * + * If `false`, a bunch of operations may become unexpectedly lossy, such as conversion to string and back. + * + * @default true + */ + allowUICCountryUnknown?: boolean, + /** + * Validates the check digit. + * + * If `true`, an incorrect check digit throws. If `false`, an incorrect check digit is ignored and we override it. + * + * When no check digit is provided, this value is always treated as `false`. + * + * @default true + */ + validateCheckDigit?: boolean, + // TODO: Add option to, when we fail to identify the country from the UIC, attempt to find it from the reporting mark and override the country code using that. +} +export default class UICVehicle<Operator extends string | null, Country extends UICCountry | null = UICCountry | null> { + public static getSelfCheckDigit(uic: string) { + if ((/[^0-9]/g).test(uic)) throw new Error('Expected a UIC with only digits.') + if (uic.length === 12) + throw new Error('This function assumes you pass a UIC *without* a self-check digit.') + if (uic.length !== 11) + throw new Error('UIC does not have length of 11.') + + let uicSum = 0; + let iterator = 0; + + for (let char of uic) { + let match = parseInt(char, 10); + if (iterator % 2 === 0) { + let oddNumber = 2 * match; + if (oddNumber > 9) { + uicSum += oddNumber - 9; + } else { + uicSum += oddNumber; + } + } else { + uicSum += match; + } + iterator++; + } + + let uicSelfCheckDigit = (uicSum % 10) > 0 ? 10 - (uicSum % 10) : 0; + return uicSelfCheckDigit; + } + protected readonly country: Country; + public hasCountry(): Country extends null ? false : true { + // @ts-ignore + return this.country !== null; + } + public getCountry(): Country extends null ? never : string { + if (!this.hasCountry()) throw new Error('UIC has no country!') + // @ts-ignore + return this.country; + } + protected operator = null as Operator; + public hasOperator(): Operator extends null ? false : true { + // @ts-ignore + return this.operator !== null; + } + public getOperator(): Operator extends null ? never : string { + if (!this.hasOperator()) throw new Error('UIC has no operator!') + // @ts-ignore + return this.operator; + } + protected vehicleType: `${Digit}${Digit}`; + /** Only available if Country is null */ + protected countryCode: Country extends null ? `${Digit}${Digit}` : undefined; + protected vehicleFamily: `${Digit}${Digit}${Digit}${Digit}`; + protected serialNumber: `${Digit}${Digit}${Digit}`; + public get selfCheckDigit(): Digit { + return UICVehicle.getSelfCheckDigit(this.vehicleType + (this.country?.uicIdentifier ?? this.countryCode) + this.vehicleFamily + this.serialNumber).toString() as Digit + } + public toString() { + // Separation of first character of vehicleFamily is typically done in Switzerland for SBB and BLS (although not Thurbo?) - and not in germany, unsure about elsewhere + const cc = this.country?.uicIdentifier ?? this.countryCode; + const ccstr = (this.country?.treatyShort ?? UICCountry.countryMaps.byUICIdentificationNumber.get(this.countryCode!)?.treatyShort ?? null) + return `${this.vehicleType} ${cc} ${cc === '85' ? `${this.vehicleFamily.charAt(1) + ' ' + this.vehicleFamily.substring(1)}` : this.vehicleFamily} ${this.serialNumber} ${this.selfCheckDigit}${ccstr && this.operator ? ` ${ccstr}-${this.operator}` : ''}` + } + public toJSON() { + return { + operator: this.operator, + vehicleType: this.vehicleType, + vehicleFamily: this.vehicleFamily, + serialNumber: this.serialNumber, + countryCode: this.country?.uicIdentifier ?? this.countryCode, + checkDigit: this.selfCheckDigit, + country: this.country + } + } + public constructor(uic: string, options?: UICParserOptions) { + if (!uic) + throw new Error('UIC is empty or null') + if (typeof uic !== 'string') + throw new Error('UIC is not a string') + + // Parse Options + const willQueryReportingMark = (options?.fetchOperatorFromReportingMark ?? true) + const willValidateCountryCode = willQueryReportingMark ? !(options?.allowMissmatchedReportingMarkCountry ?? true) : false + const countryCodeValidationPermitsUnknowns = willValidateCountryCode ? (options?.allowUnknownReportingMarkCountry ?? true) : true + const allowUICCountryUnknown = options?.allowUICCountryUnknown ?? true; + const validateCheckDigit = options?.validateCheckDigit ?? true; + + // 1st part of expression validates the length of the code + // 2nd part checks if a [Reporting Mark](https://en.wikipedia.org/wiki/Reporting_mark#Europe_since_2006) is present, to find operator information. + // Note: The first capture group *may* include random characters. We filter those out later. + // Note 2: This is resilient to *some* errors in *some* places; such as ignoring alphabetic characters in the number, or any non-characters before the reporting mark (or in it's separator) + // Note 3: Adding a space to the end of the UIC is really a bad hotfix, however it solves UICs like `94 85 1 511 052-6` not having the -6 parsed + const firstParse = `${uic} `.match(/((?:[^0-9]*?[0-9][-_]*){11,12})[^a-zA-Z\u00C0-\u024F\u1E00-\u1EFF]+([a-zA-Z\u00C0-\u024F\u1E00-\u1EFF]+[^a-zA-Z\u00C0-\u024F\u1E00-\u1EFF]+[a-zA-Z\u00C0-\u024F\u1E00-\u1EFF]{2,5})?/) + if (!firstParse) throw new Error('Invalid UIC code.') + + // Now, extract the numeric part + const numeric = firstParse[1].replace(/[^0-9]/g, '').match(/^(?<vehicleType>[0-9]{2})(?<countryCode>[0-9]{2})(?<vehicleFamily>[0-9]{4})(?<serialNumber>[0-9]{3})(?<selfCheckDigit>[0-9])?$/) as (RegExpMatchArray & { + groups: { + vehicleType: `${Digit}${Digit}`, + countryCode: `${Digit}${Digit}`, + vehicleFamily: `${Digit}${Digit}${Digit}${Digit}`, + serialNumber: `${Digit}${Digit}${Digit}`, + selfCheckDigit: Digit, + } + }) | null; + if (!numeric) throw new Error('Failed to parse UIC code. Invalid length? Parser bug? Who knows at this point! Anyways, you fucked up bad to get here.') + + // Calculate an expected check digit + const selfCheckDigit = UICVehicle.getSelfCheckDigit(`${(['vehicleType', 'countryCode', 'vehicleFamily', 'serialNumber'] as const).map(v => numeric.groups[v]).join('')}`) + if (validateCheckDigit && numeric.groups.selfCheckDigit !== undefined && parseInt(numeric.groups.selfCheckDigit) !== selfCheckDigit) throw new Error(`Incorrect Self-Check Digit! Expected ${selfCheckDigit}, got ${numeric.groups.selfCheckDigit}`) + + // Aaand the reporting mark + const reportingMark = (willQueryReportingMark + ? ((firstParse[2]?.match(/(?<countryCode>[a-zA-Z\u00C0-\u024F\u1E00-\u1EFF]+)[^a-zA-Z\u00C0-\u024F\u1E00-\u1EFF]+(?<operator>[a-zA-Z\u00C0-\u024F\u1E00-\u1EFF]{2,5})/) ?? null)) + : null) as (RegExpMatchArray & { + groups: { + countryCode: string, + operator: string + } + }) | null + if (reportingMark) + this.operator = reportingMark.groups.operator as Operator + + // Query the country we have + this.country = (UICCountry.countryMaps.byUICIdentificationNumber.get(numeric.groups.countryCode) ?? null) as Country; + if (!this.country && !allowUICCountryUnknown) + throw new Error('allowUICCountryUnknown===false yet invalid country. Code ' + numeric.groups.countryCode + ' didn\'t get us anywhere. Time for an update?') + + // Validate the reporting mark if options tell us to + if (willValidateCountryCode && reportingMark) { + const code = reportingMark.groups.countryCode + const markCountry = UICCountry.countryMaps.by1958TreatyShortCode.get(code) ?? UICCountry.countryMaps.byIso.get(code); + if (!markCountry) { + if (!countryCodeValidationPermitsUnknowns) throw new Error(`Country Code ${JSON.stringify(code)} not found in Country Code Store. Please validate.`) + } else if (this.country && !markCountry.equals(this.country)) + throw new Error(`Reporting Mark Country Code ${JSON.stringify(code)} (${markCountry.name}) is not ${this.country.name}.`) + } + + this.vehicleType = numeric.groups.vehicleType + this.vehicleFamily = numeric.groups.vehicleFamily + this.serialNumber = numeric.groups.serialNumber + if (!this.country) + // @ts-ignore + this.countryCode = numeric.groups.countryCode + else + // Hide from enumeration + delete this.countryCode + + // Finally, verify we didn't go insane + if (this.selfCheckDigit !== selfCheckDigit.toString()) + throw new Error('Self Check Digit Mismatch!') + } + public static fromJSON<Operator extends string | null>(json: { + operator: Operator; + vehicleType: `${Digit}${Digit}`; + vehicleFamily: `${Digit}${Digit}${Digit}${Digit}` | `${Digit} ${Digit}${Digit}${Digit}`; + serialNumber: `${Digit}${Digit}${Digit}`; + checkDigit?: Digit; + } & ({ + countryCode: `${Digit}${Digit}` + } | { + country: UICCountry; + } | { + countryCode: `${Digit}${Digit}` + country: UICCountry; + }), _internalParserOptions?: UICParserOptions) { + const cc = (json.operator ? 'country' in json ? json.country.treatyShort : UICCountry.countryMaps.byUICIdentificationNumber.get(json.countryCode)?.treatyShort : null) + const reconstructedCode = (`${json.vehicleType} ${'countryCode' in json ? json.countryCode : json.country?.uicIdentifier} ${json.vehicleFamily} ${json.serialNumber}${json.checkDigit ? '-' + json.checkDigit : ''}${cc ? ` ${cc}-${json.operator}` : ''}`) + const reconstructedVehicle = new UICVehicle<Operator>(reconstructedCode, { + ..._internalParserOptions + }) + if (reconstructedVehicle.operator !== json.operator && !cc) + // If our CC detection fucks up, it'll probably mismatch, so we fix that here. + reconstructedVehicle.operator = json.operator + return reconstructedVehicle + } +} + +export { Digit, UICCountry } |