Phone number validation in SAS

In SAS it’s trivial to perform basic validation of a US phone number using a regular expression, and this SAS macro goes way behind the basic validation rules: it checks several rules, in service area codes, directory service, and special phone numbers. Simple unit testing ensures the macro works as intended.



%macro validate_phone(
		sm_phone_number= /* Ten-digit NANP phone number. */, 
		sm_exception= /* Variable in which to record exception.  Blank means no exception */
		);
/* in-service area codes from Microsoft Access database from NANPA <http://www.nanpa.com/area_codes/index.html> */
%let mv_npa = "201","202","203","204","205","206","207","208",
			  "209","210","212","213","214","215","216","217",
			  "218","219","224","225","226","228","229","231",
			  "234","239","240","242","246","248","250","251",
			  "252","253","254","256","260","262","264","267",
			  "268","269","270","276","281","284","289","301",
			  "302","303","304","305","306","307","308","309",
			  "310","312","313","314","315","316","317","318",
			  "319","320","321","323","325","330","331","334",
			  "336","337","339","340","345","347","351","352",
			  "360","361","385","386","401","402","403","404",
			  "405","406","407","408","409","410","412","413",
			  "414","415","416","417","418","419","423","424",
			  "425","430","432","434","435","438","440","441",
			  "442","443","450","456","458","469","470","473",
			  "475","478","479","480","484","500","501","502",
			  "503","504","505","506","507","508","509","510",
			  "512","513","514","515","516","517","518","519",
			  "520","530","533","540","541","551","559","561",
			  "562","563","567","570","571","573","574","575",
			  "580","581","585","586","587","600","601","602",
			  "603","604","605","606","607","608","609","610",
			  "612","613","614","615","616","617","618","619",
			  "620","623","626","630","631","636","641","646",
			  "647","649","650","651","657","660","661","662",
			  "664","670","671","678","681","682","684","700",
			  "701","702","703","704","705","706","707","708",
			  "709","710","712","713","714","715","716","717",
			  "718","719","720","724","727","731","732","734",
			  "740","747","754","757","758","760","762","763",
			  "765","767","769","770","772","773","774","775",
			  "778","779","780","781","784","785","786","787",
			  "800","801","802","803","804","805","806","807",
			  "808","809","810","812","813","814","815","816",
			  "817","818","819","828","829","830","831","832",
			  "843","845","847","848","849","850","856","857",
			  "858","859","860","862","863","864","865","866",
			  "867","868","869","870","872","876","877","878",
			  "888","900","901","902","903","904","905","906",
			  "907","908","909","910","912","913","914","915",
			  "916","917","918","919","920","925","928","931",
			  "936","937","939","940","941","947","949","951",
			  "952","954","956","970","971","972","973","978",
			  "979","980","985","989";
	/* The order of the errors is somewhat significant: syntax errors are more serious, 
	 * so they are last to override other errors. */

	/* A=area code not in service */
	if substr(&sm_phone_number, 1, 3) not in (&mv_npa) then &sm_exception = 'A'; 

	/* R=repeating number like 5555555555 is a probable fake */
	if prxmatch('/^([0-9])(\1{9})$/', strip(&sm_phone_number)) eq 1 then &sm_exception = 'R'; 

	/* I=Skype and Google GMail phone numbers do not allow inbound calls */
	if &sm_phone_number in ('2025808200', '7607058888') then &sm_exception = 'I'; 

	/* D=directory assistance <https://en.wikipedia.org/wiki/555-1212> */
	if prxmatch('/^[0-9]{3}5551212$/', trim(&sm_phone_number)) eq 1 then &sm_exception = 'D'; 

	/* F=numbers specifically reserved for fictional use are "555-0100" through "555-0199" */
	if prxmatch('/^[2-9][0-8][0-9]55501[0-9]{2}$/', strip(&sm_phone_number)) eq 1 then &sm_exception = 'F'; /* fake */

	/* 1=the last two digits of NXX cannot both be 1, to avoid confusion with the N11 
	 * codes (http://en.wikipedia.org/wiki/North_American_Numbering_Plan) 
     * Only non-geographic area codes, such as toll-free 800/888/877/866/855 numbers 
	 * and 900 numbers may use N11 as the telephone exchange prefix, since 
	 * the area code must always be dialed for these numbers. 
	 * <https://en.wikipedia.org/wiki/N11_code> */
	if (prxmatch('/^(800|888|877|866|855|900)/', strip(&sm_phone_number)) ne 1) and
		(prxmatch('/^[2-9][0-8][0-9][2-9]11[0-9]{4}\b/', strip(&sm_phone_number)) eq 1) then &sm_exception = '1';

	/* S=basic NANP syntax */
	if prxmatch('/^\(?[2-9][0-8][0-9]\)? ?[2-9][0-9]{2}-?[0-9]{4}\b/', strip(&sm_phone_number)) ne 1 then &sm_exception = 'S'; 
%mend;


/* Data set for unit testing */

data ph;
	input expected_exception $1. phone $12.;
datalines;
  2024561111
  2024561414
  2024566213
  2024562121
  8885116548
  8007775777
  8009110000
  8779110000
  8889110000
1 2022110000
1 2023110000
1 2024110000
1 2025110000
1 2026110000
1 2027110000
1 2028110000
1 2029110000
S 0000000000
S 
S 2
S 20
S 202
S 2024
S 20245
S 202456
S 2024561
S 20245614
S 202456141
S 20245614141
S 1024561111
S 2021561111
A 5894561111
R 4444444444
R 5555555555
R 6666666666
R 7777777777
R 8888888888
F 7195550100
F 7195550199
I 2025808200
I 7607058888
D 2025551212
;


/* Test macro on test data */
/* If the macro fails to works correctly, then display an error to the log. */
data ph;
	set ph;
	%validate_phone(sm_phone_number=phone, sm_exception=exception);	
	if exception ne expected_exception then put 'ERROR: ' _ALL_;
run;

Leave a comment