SQL: Fix FORMAT function to better comply with Microsoft SQL Server specification (#86225)

This commit is contained in:
Luigi Dell'Aquila 2022-05-18 12:03:00 +02:00 committed by GitHub
parent fd99a502ee
commit f69c7396d9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 303 additions and 41 deletions

View file

@ -225,3 +225,6 @@ indent_size = 2
[*.{xsd,xml}]
indent_size = 4
[*.{csv,sql}-spec]
trim_trailing_whitespace = false

View file

@ -0,0 +1,6 @@
pr: 86225
summary: Fix FORMAT function to comply with Microsoft SQL Server specification
area: SQL
type: bug
issues:
- 66560

View file

@ -810,7 +810,7 @@ SQL Server Format Specification].
[NOTE]
If the 1st argument is of type `time`, then pattern specified by the 2nd argument cannot contain date related units
(e.g. 'dd', 'MM', 'YYYY', etc.). If it contains such units an error is returned. +
(e.g. 'dd', 'MM', 'yyyy', etc.). If it contains such units an error is returned. +
Format specifier `F` will be working similar to format specifier `f`.
It will return the fractional part of seconds, and the number of digits will be same as of the number of `Fs` provided as input (up to 9 digits).
Result will contain `0` appended in the end to match with number of `F` provided.

View file

@ -404,3 +404,46 @@ SELECT emp_no FROM test_emp WHERE DATE_ADD('day', 1, hire_date) = '2021-02-03||-
10044
10085
;
// format
formatNormalPattern
SELECT FORMAT(birth_date, 'dd/MM/yyyy') as x FROM test_emp ORDER BY emp_no LIMIT 1;
x
----------
02/09/1953
;
formatWithDoubleQuoteEscaping
SELECT FORMAT(birth_date, '"yyyy" yyyy') as x FROM test_emp ORDER BY emp_no LIMIT 1;
x
------
yyyy 1953
;
formatSingleQuote
SELECT FORMAT(birth_date, '"''" yyyy') as x FROM test_emp ORDER BY emp_no LIMIT 1;
x
------
' 1953
;
formatQuotesAndAllowedCharacters
SELECT FORMAT(birth_date, 'abc ''yyy'' yyyy') as x FROM test_emp ORDER BY emp_no LIMIT 1;
x
------
abc yyy 1953
;
formatQuotesComplexString
SELECT FORMAT(birth_date, '\t\hi\s i\s \t\h\e \y\ear yyyy an\d \t\h\e \mon\t\h MM') as x FROM test_emp ORDER BY emp_no LIMIT 1;
x
------------------------------------
this is the year 1953 and the month 09
;

View file

@ -1173,8 +1173,8 @@ M | 1996-11-05 00:00:00.000Z
selectFormat
schema::format_date:s|format_datetime:s|format_time:s
SELECT FORMAT('2020-04-05T11:22:33.123Z'::date, 'dd/MM/YYYY HH:mm:ss.fff') AS format_date,
FORMAT('2020-04-05T11:22:33.123Z'::datetime, 'dd/MM/YYYY HH:mm:ss.ff') AS format_datetime,
SELECT FORMAT('2020-04-05T11:22:33.123Z'::date, 'dd/MM/yyyy HH:mm:ss.fff') AS format_date,
FORMAT('2020-04-05T11:22:33.123Z'::datetime, 'dd/MM/yyyy HH:mm:ss.ff') AS format_datetime,
FORMAT('11:22:33.123456789Z'::time, 'HH:mm:ss.ff') AS format_time;
format_date | format_datetime | format_time
@ -1184,8 +1184,8 @@ FORMAT('11:22:33.123456789Z'::time, 'HH:mm:ss.ff') AS format_time;
selectFormatWithLength
schema::format_datetime:s|length:i
SELECT FORMAT('2020-04-05T11:22:33.123Z'::datetime, 'dd/MM/YYYY HH:mm:ss.ff') AS format_datetime,
LENGTH(FORMAT('2020-04-05T11:22:33.123Z'::datetime, 'dd/MM/YYYY HH:mm:ss.ff')) AS length;
SELECT FORMAT('2020-04-05T11:22:33.123Z'::datetime, 'dd/MM/yyyy HH:mm:ss.ff') AS format_datetime,
LENGTH(FORMAT('2020-04-05T11:22:33.123Z'::datetime, 'dd/MM/yyyy HH:mm:ss.ff')) AS length;
format_datetime | length
------------------------+----------------
@ -1194,7 +1194,7 @@ LENGTH(FORMAT('2020-04-05T11:22:33.123Z'::datetime, 'dd/MM/YYYY HH:mm:ss.ff')) A
selectFormatWithField
schema::birth_date:ts|format_birth_date1:s|format_birth_date2:s|emp_no:i
SELECT birth_date, FORMAT(birth_date, 'MM/dd/YYYY') AS format_birth_date1, FORMAT(birth_date, concat(gender, 'M/dd')) AS format_birth_date2, emp_no
SELECT birth_date, FORMAT(birth_date, 'MM/dd/yyyy') AS format_birth_date1, FORMAT(birth_date, concat(gender, 'M/dd')) AS format_birth_date2, emp_no
FROM test_emp WHERE gender = 'M' AND emp_no BETWEEN 10037 AND 10052 ORDER BY emp_no;
birth_date | format_birth_date1 | format_birth_date2 | emp_no
@ -1233,7 +1233,7 @@ WHERE FORMAT(birth_date, 'MM')::integer > 10 ORDER BY emp_no LIMIT 10;
formatOrderBy
schema::birth_date:ts|format_birth_date:s
SELECT birth_date, FORMAT(birth_date, 'MM/dd/YYYY') AS format_birth_date FROM test_emp ORDER BY 2 DESC NULLS LAST LIMIT 10;
SELECT birth_date, FORMAT(birth_date, 'MM/dd/yyyy') AS format_birth_date FROM test_emp ORDER BY 2 DESC NULLS LAST LIMIT 10;
birth_date | format_birth_date
-------------------------+---------------

View file

@ -3190,7 +3190,7 @@ SELECT DATE_TRUNC('days', INTERVAL '19 15:24:19' DAY TO SECONDS) AS day;
formatDate
// tag::formatDate
SELECT FORMAT(CAST('2020-04-05' AS DATE), 'dd/MM/YYYY') AS "date";
SELECT FORMAT(CAST('2020-04-05' AS DATE), 'dd/MM/yyyy') AS "date";
date
------------------
@ -3200,7 +3200,7 @@ SELECT FORMAT(CAST('2020-04-05' AS DATE), 'dd/MM/YYYY') AS "date";
formatDateTime
// tag::formatDateTime
SELECT FORMAT(CAST('2020-04-05T11:22:33.987654' AS DATETIME), 'dd/MM/YYYY HH:mm:ss.ff') AS "datetime";
SELECT FORMAT(CAST('2020-04-05T11:22:33.987654' AS DATETIME), 'dd/MM/yyyy HH:mm:ss.ff') AS "datetime";
datetime
------------------

View file

@ -20,6 +20,7 @@ import java.time.format.DateTimeFormatter;
import java.time.temporal.TemporalAccessor;
import java.util.Locale;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import static org.elasticsearch.xpack.sql.util.DateUtils.asTimeAtZone;
@ -27,7 +28,42 @@ import static org.elasticsearch.xpack.sql.util.DateUtils.asTimeAtZone;
public class DateTimeFormatProcessor extends BinaryDateTimeProcessor {
public static final String NAME = "dtformat";
private static final String[][] JAVA_TIME_FORMAT_REPLACEMENTS = {
/**
* these characters have a meaning in MS date patterns.
* If a character is not in this set, then it's still allowed in MS FORMAT patters
* but not in Java, so it has to be translated or quoted
*/
private static final Set<Character> MS_DATETIME_PATTERN_CHARS = Set.of(
'd',
'f',
'F',
'g',
'h',
'H',
'K',
'm',
'M',
's',
't',
'y',
'z',
':',
'/',
' ',
'-'
);
/**
* characters that start a quoting block in MS patterns
*/
private static final Set<Character> MS_QUOTING_CHARS = Set.of('\\', '\'', '"');
/**
* list of MS datetime patterns with the corresponding translation in Java DateTimeFormat
* (patterns that are the same in Java and in MS are not listed here)
*/
private static final String[][] MS_TO_JAVA_PATTERNS = {
{ "tt", "a" },
{ "t", "a" },
{ "dddd", "eeee" },
@ -47,10 +83,7 @@ public class DateTimeFormatProcessor extends BinaryDateTimeProcessor {
if (pattern.isEmpty()) {
return null;
}
for (String[] replacement : JAVA_TIME_FORMAT_REPLACEMENTS) {
pattern = pattern.replace(replacement[0], replacement[1]);
}
final String javaPattern = pattern;
final String javaPattern = msToJavaPattern(pattern);
return DateTimeFormatter.ofPattern(javaPattern, Locale.ROOT)::format;
}
},
@ -67,6 +100,95 @@ public class DateTimeFormatProcessor extends BinaryDateTimeProcessor {
}
};
protected static String msToJavaPattern(String pattern) {
StringBuilder result = new StringBuilder(pattern.length());
StringBuilder partialQuotedString = new StringBuilder();
boolean originalCharacterQuoted = false;
boolean lastTargetCharacterQuoted = false;
char quotingChar = '\\';
for (int i = 0; i < pattern.length(); i++) {
char c = pattern.charAt(i);
if (originalCharacterQuoted) {
if (quotingChar == '\\') {
// in the original pattern, this is a single quoted character, add it to the partial string
// that will be quoted in Java
originalCharacterQuoted = false;
lastTargetCharacterQuoted = true;
partialQuotedString.append(c);
} else if (c == quotingChar) {
// the original pattern is closing the quoting,
// do nothing for now, next character could open a new quoting block
originalCharacterQuoted = false;
} else {
// any character that is not a quoting char is just added to the partial quoting string
// because there could be more characters to quote after that
partialQuotedString.append(c);
}
} else {
boolean characterProcessed = false;
// the original pattern is not quoting
if (MS_QUOTING_CHARS.contains(c)) {
// next character(s) is quoted, start a quoted block on the target
originalCharacterQuoted = true;
lastTargetCharacterQuoted = true;
quotingChar = c;
characterProcessed = true;
} else {
// manage patterns that are different from MS to Java and have to be translated
for (String[] item : MS_TO_JAVA_PATTERNS) {
int fragmentLength = item[0].length();
if (i + fragmentLength <= pattern.length() && item[0].equals(pattern.substring(i, i + fragmentLength))) {
if (lastTargetCharacterQuoted) {
// now origin is not quoting for sure and the next block is a valid datetime pattern,
// that has to be translated and written as is (not quoted).
// Before doing this, let's flush the previously quoted string
// and quote it properly with Java syntax
lastTargetCharacterQuoted = false;
quoteAndAppend(result, partialQuotedString);
partialQuotedString = new StringBuilder();
}
// and then translate the pattern
result.append(item[1]);
characterProcessed = true;
i += (fragmentLength - 1); // fast-forward, because the replaced pattern could be longer than one character
break;
}
}
}
if (characterProcessed == false) {
if (MS_DATETIME_PATTERN_CHARS.contains(c) == false) {
// this character is allowed in MS, but not in Java, so it has to be quoted in the result
lastTargetCharacterQuoted = true;
partialQuotedString.append(c);
} else {
// any other character is a valid datetime pattern in both Java and MS
if (lastTargetCharacterQuoted) {
// flush the quoted string first, if any
lastTargetCharacterQuoted = false;
quoteAndAppend(result, partialQuotedString);
partialQuotedString = new StringBuilder();
}
// and then add the character itself, as it is
result.append(c);
}
}
}
}
// if the original pattern ended with a quoted block, flush it to the result and quote it in Java
if (lastTargetCharacterQuoted) {
quoteAndAppend(result, partialQuotedString);
}
return result.toString();
}
private static void quoteAndAppend(StringBuilder mainBuffer, StringBuilder fragmentToQuote) {
mainBuffer.append("'");
mainBuffer.append(fragmentToQuote.toString().replaceAll("'", "''"));
mainBuffer.append("'");
}
protected abstract Function<TemporalAccessor, String> formatterFor(String pattern);
public Object format(Object timestamp, Object pattern, ZoneId zoneId) {

View file

@ -118,17 +118,6 @@ public class DateTimeFormatProcessorTests extends AbstractSqlWireSerializingTest
);
assertEquals("A string is required; received [5]", siae.getMessage());
siae = expectThrows(
SqlIllegalArgumentException.class,
() -> new Format(Source.EMPTY, l(dateTime(2019, 9, 3, 18, 10, 37, 0)), l("invalid"), randomZone()).makePipe()
.asProcessor()
.process(null)
);
assertEquals(
"Invalid pattern [invalid] is received for formatting date/time [2019-09-03T18:10:37Z]; Unknown pattern letter: i",
siae.getMessage()
);
siae = expectThrows(
SqlIllegalArgumentException.class,
() -> new Format(Source.EMPTY, l(time(18, 10, 37, 123000000)), l("MM/dd"), randomZone()).makePipe().asProcessor().process(null)
@ -188,16 +177,17 @@ public class DateTimeFormatProcessorTests extends AbstractSqlWireSerializingTest
zoneId = ZoneId.of("Etc/GMT-10");
dateTime = l(dateTime(2019, 9, 3, 18, 10, 37, 123456789));
assertEquals("AD : 3", new Format(Source.EMPTY, dateTime, l("G : Q"), zoneId).makePipe().asProcessor().process(null));
assertEquals("G : Q", new Format(Source.EMPTY, dateTime, l("G : Q"), zoneId).makePipe().asProcessor().process(null));
assertEquals("AD", new Format(Source.EMPTY, dateTime, l("g"), zoneId).makePipe().asProcessor().process(null));
assertEquals("2019-09-04", new Format(Source.EMPTY, dateTime, l("YYYY-MM-dd"), zoneId).makePipe().asProcessor().process(null));
assertEquals("2019-09-04", new Format(Source.EMPTY, dateTime, l("yyyy-MM-dd"), zoneId).makePipe().asProcessor().process(null));
assertEquals("YYYY-09-04", new Format(Source.EMPTY, dateTime, l("YYYY-MM-dd"), zoneId).makePipe().asProcessor().process(null));
assertEquals(
"2019-09-04 Wed",
new Format(Source.EMPTY, dateTime, l("YYYY-MM-dd ddd"), zoneId).makePipe().asProcessor().process(null)
new Format(Source.EMPTY, dateTime, l("yyyy-MM-dd ddd"), zoneId).makePipe().asProcessor().process(null)
);
assertEquals(
"2019-09-04 Wednesday",
new Format(Source.EMPTY, dateTime, l("YYYY-MM-dd dddd"), zoneId).makePipe().asProcessor().process(null)
new Format(Source.EMPTY, dateTime, l("yyyy-MM-dd dddd"), zoneId).makePipe().asProcessor().process(null)
);
assertEquals(
"04:10:37.123456",
@ -205,28 +195,27 @@ public class DateTimeFormatProcessorTests extends AbstractSqlWireSerializingTest
);
assertEquals(
"2019-09-04 04:10:37.12345678",
new Format(Source.EMPTY, dateTime, l("YYYY-MM-dd HH:mm:ss.ffffffff"), zoneId).makePipe().asProcessor().process(null)
new Format(Source.EMPTY, dateTime, l("yyyy-MM-dd HH:mm:ss.ffffffff"), zoneId).makePipe().asProcessor().process(null)
);
assertEquals(
"2019-09-04 04:10:37.12345678 AM",
new Format(Source.EMPTY, dateTime, l("YYYY-MM-dd HH:mm:ss.ffffffff tt"), zoneId).makePipe().asProcessor().process(null)
new Format(Source.EMPTY, dateTime, l("yyyy-MM-dd HH:mm:ss.ffffffff tt"), zoneId).makePipe().asProcessor().process(null)
);
assertEquals(
"2019-09-04 04:10:37.12345678 AM",
new Format(Source.EMPTY, dateTime, l("YYYY-MM-dd HH:mm:ss.ffffffff t"), zoneId).makePipe().asProcessor().process(null)
new Format(Source.EMPTY, dateTime, l("yyyy-MM-dd HH:mm:ss.ffffffff t"), zoneId).makePipe().asProcessor().process(null)
);
assertEquals("+1000", new Format(Source.EMPTY, dateTime, l("Z"), zoneId).makePipe().asProcessor().process(null));
assertEquals("Z", new Format(Source.EMPTY, dateTime, l("Z"), zoneId).makePipe().asProcessor().process(null));
assertEquals("+10", new Format(Source.EMPTY, dateTime, l("z"), zoneId).makePipe().asProcessor().process(null));
assertEquals("Etc/GMT-10", new Format(Source.EMPTY, dateTime, l("VV"), zoneId).makePipe().asProcessor().process(null));
assertEquals("Etc/GMT-10", new Format(Source.EMPTY, dateTime, l("K"), zoneId).makePipe().asProcessor().process(null));
assertEquals("1", new Format(Source.EMPTY, dateTime, l("F"), zoneId).makePipe().asProcessor().process(null));
assertEquals("12", new Format(Source.EMPTY, dateTime, l("FF"), zoneId).makePipe().asProcessor().process(null));
zoneId = ZoneId.of("America/Sao_Paulo");
assertEquals("-0300", new Format(Source.EMPTY, dateTime, l("Z"), zoneId).makePipe().asProcessor().process(null));
assertEquals("Z", new Format(Source.EMPTY, dateTime, l("Z"), zoneId).makePipe().asProcessor().process(null));
assertEquals("-03", new Format(Source.EMPTY, dateTime, l("z"), zoneId).makePipe().asProcessor().process(null));
assertEquals("America/Sao_Paulo", new Format(Source.EMPTY, dateTime, l("VV"), zoneId).makePipe().asProcessor().process(null));
assertEquals("VV", new Format(Source.EMPTY, dateTime, l("VV"), zoneId).makePipe().asProcessor().process(null));
assertEquals(
"07:11:22.1234",
@ -266,7 +255,7 @@ public class DateTimeFormatProcessorTests extends AbstractSqlWireSerializingTest
assertEquals(
"%9-\"09-\\Sep-September",
new Format(Source.EMPTY, dateTime, l("%M-\"MM-\\MMM-MMMM"), zoneId).makePipe().asProcessor().process(null)
new Format(Source.EMPTY, dateTime, l("%M-\\\"MM-\\\\MMM-MMMM"), zoneId).makePipe().asProcessor().process(null)
);
assertEquals(
@ -276,4 +265,103 @@ public class DateTimeFormatProcessorTests extends AbstractSqlWireSerializingTest
.process(null)
);
}
public void testQuoting() {
ZoneId zoneId = ZoneId.of("Etc/GMT-10");
Literal dateTime = l(dateTime(2019, 9, 3, 18, 10, 37, 123456789));
assertEquals(
"this is the year 2019 and the month 09",
new Format(Source.EMPTY, dateTime, l("\\t\\hi\\s i\\s \\t\\h\\e \\y\\ear yyyy an\\d \\t\\h\\e \\mon\\t\\h MM"), zoneId)
.makePipe()
.asProcessor()
.process(null)
);
assertEquals(
"this is the year 2019 and the month 09",
new Format(Source.EMPTY, dateTime, l("'this is the year' yyyy 'and the month' MM"), zoneId).makePipe()
.asProcessor()
.process(null)
);
assertEquals(
"this is the year 2019 and the month 09",
new Format(Source.EMPTY, dateTime, l("\"this is the year\" yyyy \"and the month\" MM"), zoneId).makePipe()
.asProcessor()
.process(null)
);
assertEquals(
"yxyzdm 09",
new Format(Source.EMPTY, dateTime, l("\\y'xyz'\"dm\" MM"), zoneId).makePipe().asProcessor().process(null)
);
assertEquals(
"this \" is a double quote",
new Format(Source.EMPTY, dateTime, l("'this \" is a double quote'"), zoneId).makePipe().asProcessor().process(null)
);
assertEquals(
"this ' is a single quote",
new Format(Source.EMPTY, dateTime, l("\"this ' is a single quote\""), zoneId).makePipe().asProcessor().process(null)
);
assertEquals(
"' also this is a single quote",
new Format(Source.EMPTY, dateTime, l("\"' also this is a single quote\""), zoneId).makePipe().asProcessor().process(null)
);
assertEquals(
"and this too '",
new Format(Source.EMPTY, dateTime, l("\"and this too '\""), zoneId).makePipe().asProcessor().process(null)
);
assertEquals("''", new Format(Source.EMPTY, dateTime, l("\"''\""), zoneId).makePipe().asProcessor().process(null));
assertEquals("\\", new Format(Source.EMPTY, dateTime, l("\"\\\""), zoneId).makePipe().asProcessor().process(null));
}
public void testAllowedCharactersIn() {
ZoneId zoneId = ZoneId.of("Etc/GMT-10");
Literal dateTime = l(dateTime(2019, 9, 3, 18, 10, 37, 123456789));
assertEquals("DGTYZ", new Format(Source.EMPTY, dateTime, l("DGTYZ"), zoneId).makePipe().asProcessor().process(null));
assertEquals(
"DGTYZ 4ADAM2019+10",
new Format(Source.EMPTY, dateTime, l("DGTYZ dgtyz"), zoneId).makePipe().asProcessor().process(null)
);
assertEquals(
"abceijklnopqruwxABCDEGIJLNOPQRSTUVWXYZ",
new Format(Source.EMPTY, dateTime, l("abceijklnopqruwxABCDEGIJLNOPQRSTUVWXYZ"), zoneId).makePipe().asProcessor().process(null)
);
assertEquals(
";.,?{}[]()!@#$%^&*",
new Format(Source.EMPTY, dateTime, l(";.,?{}[]()!@#$%^&*"), zoneId).makePipe().asProcessor().process(null)
);
}
public void testMsToJavaPattern() {
assertEquals("", DateTimeFormatProcessor.Formatter.msToJavaPattern(""));
assertEquals(
"dd/mm/yyyy hh:mm:ssss S S G h H v a y X",
DateTimeFormatProcessor.Formatter.msToJavaPattern("dd/mm/yyyy hh:mm:ssss f F g h H K t y z")
);
assertEquals(
"'abceijklnopqruwxABCDEGIJLNOPQRSTUVWXYZ'",
DateTimeFormatProcessor.Formatter.msToJavaPattern("abceijklnopqruwxABCDEGIJLNOPQRSTUVWXYZ")
);
assertEquals("a", DateTimeFormatProcessor.Formatter.msToJavaPattern("t"));
assertEquals("a", DateTimeFormatProcessor.Formatter.msToJavaPattern("tt"));
assertEquals("eee", DateTimeFormatProcessor.Formatter.msToJavaPattern("ddd"));
assertEquals("eeee", DateTimeFormatProcessor.Formatter.msToJavaPattern("dddd"));
assertEquals("vGSSX", DateTimeFormatProcessor.Formatter.msToJavaPattern("KgfFz"));
assertEquals("'foo'", DateTimeFormatProcessor.Formatter.msToJavaPattern("\"foo\""));
assertEquals("'foo'", DateTimeFormatProcessor.Formatter.msToJavaPattern("'foo'"));
assertEquals("'foo'", DateTimeFormatProcessor.Formatter.msToJavaPattern("\\f\\o\\o"));
assertEquals("'foo'", DateTimeFormatProcessor.Formatter.msToJavaPattern("\\f\"oo\""));
assertEquals("'foobar'", DateTimeFormatProcessor.Formatter.msToJavaPattern("'foo'\"bar\""));
assertEquals("'abce' 'abce'", DateTimeFormatProcessor.Formatter.msToJavaPattern("abce abce"));
}
}