com.microsoft.sqlserver.jdbc.SQLServerBulkCSVFileRecord Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mssql-jdbc Show documentation
Show all versions of mssql-jdbc Show documentation
Microsoft JDBC Driver for SQL Server.
//---------------------------------------------------------------------------------------------------------------------------------
// File: SQLServerBulkCSVFileRecord.java
//
//
// Microsoft JDBC Driver for SQL Server
// Copyright(c) Microsoft Corporation
// All rights reserved.
// MIT License
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files(the ""Software""),
// to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and / or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions :
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
//---------------------------------------------------------------------------------------------------------------------------------
package com.microsoft.sqlserver.jdbc;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.text.DecimalFormat;
import java.text.MessageFormat;
import java.time.*;
import java.time.format.*;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
/**
* A simple implementation of the ISQLServerBulkRecord interface that can be used to read in the
* basic Java data types from a delimited file where each line represents a row of data.
*/
public class SQLServerBulkCSVFileRecord implements ISQLServerBulkRecord , java.lang.AutoCloseable
{
/*
* Class to represent the column metadata
*/
private class ColumnMetadata{
String columnName;
int columnType;
int precision;
int scale;
DateTimeFormatter dateTimeFormatter = null;
ColumnMetadata(String name, int type, int precision, int scale, DateTimeFormatter dateTimeFormatter)
{
columnName = name;
columnType = type;
this.precision = precision;
this.scale = scale;
this.dateTimeFormatter = dateTimeFormatter;
}
}
/*
* Resources associated with reading in the file
*/
private BufferedReader fileReader;
private InputStreamReader sr;
private FileInputStream fis;
/*
* Metadata to represent the columns in the file. Each column should be mapped to its corresponding
* position within the file (from position 1 and onwards)
*/
private Map columnMetadata;
/*
* Current line of data to parse.
*/
private String currentLine = null;
/*
* Delimiter to parse lines with.
*/
private final String delimiter;
/*
* Contains all the column names if firstLineIsColumnNames is true
*/
private String[] columnNames = null;
/*
* Contains the format that java.sql.Types.TIMESTAMP_WITH_TIMEZONE data should be read in as.
*/
private DateTimeFormatter dateTimeFormatter = null;
/*
* Contains the format that java.sql.Types.TIME_WITH_TIMEZONE data should be read in as.
*/
private DateTimeFormatter timeFormatter = null;
/*
* Class name for logging.
*/
private static final String loggerClassName = "com.microsoft.sqlserver.jdbc.SQLServerBulkCSVFileRecord";
/*
* Logger
*/
private static final java.util.logging.Logger loggerExternal =
java.util.logging.Logger.getLogger(loggerClassName);
/**
* Creates a simple reader to parse data from a delimited file with the given encoding.
*
* @param fileToParse File to parse data from
* @param encoding Charset encoding to use for reading the file, or NULL for the default encoding.
* @param delimiter Delimiter to used to separate each column
* @param firstLineIsColumnNames True if the first line of the file should be parsed as column names; false otherwise
* @throws SQLServerException If the arguments are invalid, there are any errors in reading the file, or the file is empty
*/
public SQLServerBulkCSVFileRecord(String fileToParse, String encoding, String delimiter, boolean firstLineIsColumnNames) throws SQLServerException
{
loggerExternal.entering(loggerClassName, "SQLServerBulkCSVFileRecord",
new Object[] { fileToParse, encoding, delimiter,
firstLineIsColumnNames });
if (null == fileToParse) {
throwInvalidArgument("fileToParse");
} else if (null == delimiter) {
throwInvalidArgument("delimiter");
}
this.delimiter = delimiter;
try
{
//Create the file reader
fis = new FileInputStream(fileToParse);
if (null == encoding || 0 == encoding.length())
{
sr = new InputStreamReader(fis);
}
else
{
sr = new InputStreamReader(fis, encoding);
}
fileReader = new BufferedReader(sr);
if (firstLineIsColumnNames)
{
currentLine = fileReader.readLine();
if(null != currentLine)
{
columnNames = currentLine.split(delimiter, -1);
}
}
}
catch(UnsupportedEncodingException unsupportedEncoding)
{
MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_unsupportedEncoding"));
throw new SQLServerException(form.format(new Object[] {encoding}), null, 0, null);
}
catch (Exception e)
{
throw new SQLServerException(null , e.getMessage() , null , 0 , false);
}
columnMetadata = new HashMap();
loggerExternal.exiting(loggerClassName, "SQLServerBulkCSVFileRecord");
}
/**
* Creates a simple reader to parse data from a CSV file with the given encoding.
*
* @param fileToParse File to parse data from
* @param encoding Charset encoding to use for reading the file.
* @param firstLineIsColumnNames True if the first line of the file should be parsed as column names; false otherwise
* @throws SQLServerException If the arguments are invalid, there are any errors in reading the file, or the file is empty
*/
public SQLServerBulkCSVFileRecord(String fileToParse, String encoding, boolean firstLineIsColumnNames) throws SQLServerException
{
this(fileToParse,encoding,",", firstLineIsColumnNames);
}
/**
* Creates a simple reader to parse data from a CSV file with the default encoding.
*
* @param fileToParse File to parse data from
* @param firstLineIsColumnNames True if the first line of the file should be parsed as column names; false otherwise
* @throws SQLServerException If the arguments are invalid, there are any errors in reading the file, or the file is empty
*/
public SQLServerBulkCSVFileRecord(String fileToParse, boolean firstLineIsColumnNames) throws SQLServerException
{
this(fileToParse,null,",", firstLineIsColumnNames);
}
public void addColumnMetadata(int positionInFile, String name, int jdbcType, int precision, int scale, DateTimeFormatter dateTimeFormatter) throws SQLServerException
{
addColumnMetadataInternal(positionInFile, name, jdbcType, precision, scale, dateTimeFormatter);
}
public void addColumnMetadata(int positionInFile, String name, int jdbcType, int precision, int scale) throws SQLServerException
{
addColumnMetadataInternal(positionInFile, name, jdbcType, precision, scale, null);
}
/**
* Adds metadata for the given column in the file.
*
* @param positionInFile Indicates which column the metadata is for. Columns start at 1.
* @param name Name for the column (optional if only using column ordinal in a mapping for SQLServerBulkCopy operation)
* @param jdbcType JDBC data type of the column
* @param precision Precision for the column (ignored for the appropriate data types)
* @param scale Scale for the column (ignored for the appropriate data types)
* @param dateTimeFormatter format to parse data that is sent
* @throws SQLServerException
*/
void addColumnMetadataInternal(int positionInFile, String name, int jdbcType, int precision, int scale, DateTimeFormatter dateTimeFormatter) throws SQLServerException
{
loggerExternal.entering(loggerClassName, "addColumnMetadata", new Object[] {
positionInFile, name, jdbcType, precision, scale });
String colName = "";
if(0 >= positionInFile)
{
MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_invalidColumnOrdinal"));
Object[] msgArgs = {positionInFile};
throw new SQLServerException(form.format(msgArgs), SQLState.COL_NOT_FOUND, DriverError.NOT_SET, null);
}
if (null != name)
colName = name.trim();
else if((columnNames!=null) && (columnNames.length >= positionInFile))
colName = columnNames[positionInFile-1];
if((columnNames!=null) && (positionInFile >columnNames.length))
{
MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_invalidColumn"));
Object[] msgArgs = {positionInFile};
throw new SQLServerException(form.format(msgArgs), SQLState.COL_NOT_FOUND, DriverError.NOT_SET, null);
}
checkDuplicateColumnName(positionInFile,name);
switch (jdbcType)
{
/*
* SQL Server supports numerous string literal formats for temporal types, hence sending them as varchar
* with approximate precision(length) needed to send supported string literals.
* string literal formats supported by temporal types are available in MSDN page on data types.
*/
case java.sql.Types.DATE:
case java.sql.Types.TIME:
case java.sql.Types.TIMESTAMP:
case microsoft.sql.Types.DATETIMEOFFSET:
// The precision is just a number long enough to hold all types of temporal data, doesn't need to be exact precision.
columnMetadata.put(positionInFile, new ColumnMetadata(colName, jdbcType, 50, scale, dateTimeFormatter));
break;
// Redirect SQLXML as LONGNVARCHAR
// SQLXML is not valid type in TDS
case java.sql.Types.SQLXML:
columnMetadata.put(positionInFile, new ColumnMetadata(colName, java.sql.Types.LONGNVARCHAR, precision, scale, dateTimeFormatter));
break;
// Redirecting Float as Double based on data type mapping
// https://msdn.microsoft.com/en-us/library/ms378878%28v=sql.110%29.aspx
case java.sql.Types.FLOAT:
columnMetadata.put(positionInFile, new ColumnMetadata(colName, java.sql.Types.DOUBLE, precision, scale, dateTimeFormatter));
break;
// redirecting BOOLEAN as BIT
case java.sql.Types.BOOLEAN:
columnMetadata.put(positionInFile, new ColumnMetadata(colName, java.sql.Types.BIT, precision, scale, dateTimeFormatter));
break;
default:
columnMetadata.put(positionInFile, new ColumnMetadata(colName, jdbcType, precision, scale, dateTimeFormatter));
}
loggerExternal.exiting(loggerClassName, "addColumnMetadata");
}
/**
* Set the format for reading in dates from the file.
* @param dateTimeFormat format to parse data sent as java.sql.Types.TIMESTAMP_WITH_TIMEZONE
*/
public void setTimestampWithTimezoneFormat(String dateTimeFormat)
{
DriverJDBCVersion.checkSupportsJDBC42();
loggerExternal.entering(loggerClassName, "setTimestampWithTimezoneFormat",dateTimeFormat);
this.dateTimeFormatter = DateTimeFormatter.ofPattern(dateTimeFormat);
loggerExternal.exiting(loggerClassName, "setTimestampWithTimezoneFormat");
}
/**
* Set the format for reading in dates from the file.
* @param dateTimeFormat format to parse data sent as java.sql.Types.TIMESTAMP_WITH_TIMEZONE
*/
public void setTimestampWithTimezoneFormat(DateTimeFormatter dateTimeFormatter)
{
loggerExternal.entering(loggerClassName,
"setTimestampWithTimezoneFormat",
new Object[] { dateTimeFormatter });
this.dateTimeFormatter = dateTimeFormatter;
loggerExternal.exiting(loggerClassName, "setTimestampWithTimezoneFormat");
}
/**
* Set the format for reading in dates from the file.
* @param dateTimeFormat format to parse data sent as java.sql.Types.TIME_WITH_TIMEZONE
*/
public void setTimeWithTimezoneFormat(String timeFormat)
{
DriverJDBCVersion.checkSupportsJDBC42();
loggerExternal.entering(loggerClassName, "setTimeWithTimezoneFormat",timeFormat);
this.timeFormatter = DateTimeFormatter.ofPattern(timeFormat);
loggerExternal.exiting(loggerClassName, "setTimeWithTimezoneFormat");
}
/**
* Set the format for reading in dates from the file.
* @param dateTimeFormat format to parse data sent as java.sql.Types.TIME_WITH_TIMEZONE
*/
public void setTimeWithTimezoneFormat(DateTimeFormatter dateTimeFormatter)
{
loggerExternal.entering(loggerClassName, "setTimeWithTimezoneFormat",
new Object[] { dateTimeFormatter });
this.timeFormatter = dateTimeFormatter;
loggerExternal.exiting(loggerClassName, "setTimeWithTimezoneFormat");
}
/**
* Releases any resources associated with the file reader.
*/
public void close() throws SQLServerException
{
loggerExternal.entering(loggerClassName, "close");
// Ignore errors since we are only cleaning up here
if (fileReader != null) try { fileReader.close(); } catch(Exception e) {}
if (sr != null) try { sr.close(); } catch(Exception e) {}
if (fis != null) try { fis.close(); } catch(Exception e) {}
loggerExternal.exiting(loggerClassName, "close");
}
public DateTimeFormatter getColumnDateTimeFormatter(int column) {
return columnMetadata.get(column).dateTimeFormatter;
}
@Override
public Set getColumnOrdinals() {
return columnMetadata.keySet();
}
@Override
public String getColumnName(int column) {
return columnMetadata.get(column).columnName;
}
@Override
public int getColumnType(int column) {
return columnMetadata.get(column).columnType;
}
@Override
public int getPrecision(int column) {
return columnMetadata.get(column).precision;
}
@Override
public int getScale(int column) {
return columnMetadata.get(column).scale;
}
@Override
public boolean isAutoIncrement(int column) {
return false;
}
@Override
public Object[] getRowData() throws SQLServerException
{
if (null == currentLine)
return null;
else
{
// Binary data may be corrupted
// The limit in split() function should be a negative value, otherwise trailing empty strings are discarded.
// Empty string is returned if there is no value.
String[] data = currentLine.split(delimiter, -1);
// Cannot go directly from String[] to Object[] and expect it to act as an array.
Object[] dataRow = new Object[data.length];
Iterator> it = columnMetadata.entrySet().iterator();
while (it.hasNext())
{
Entry pair = it.next();
ColumnMetadata cm = pair.getValue();
// Reading a column not available in csv
// positionInFile > number of columns retrieved after split
if (data.length < pair.getKey()-1)
{
MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_invalidColumn"));
Object[] msgArgs = {pair.getKey()};
throw new SQLServerException(form.format(msgArgs), SQLState.COL_NOT_FOUND, DriverError.NOT_SET, null);
}
// Source header has more columns than current line read
if (columnNames!=null && (columnNames.length > data.length))
{
MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_BulkCSVDataSchemaMismatch"));
Object[] msgArgs = {};
throw new SQLServerException(form.format(msgArgs), SQLState.COL_NOT_FOUND, DriverError.NOT_SET, null);
}
try
{
if (0 == data[pair.getKey()-1].length())
{
dataRow[pair.getKey()-1] = null;
continue;
}
switch (cm.columnType)
{
/*
* Both BCP and BULK INSERT considers double quotes as part of the data and throws error if any data
* (say "10") is to be inserted into an numeric column. Our implementation does the same.
*/
case java.sql.Types.INTEGER:
{
// Formatter to remove the decimal part as SQL Server floors the decimal in integer types
DecimalFormat decimalFormatter = new DecimalFormat("#");
String formatedfInput = decimalFormatter.format(Double.parseDouble(data[pair.getKey()-1]));
dataRow[pair.getKey()-1] = Integer.valueOf(formatedfInput);
break;
}
case java.sql.Types.TINYINT:
case java.sql.Types.SMALLINT:
{
// Formatter to remove the decimal part as SQL Server floors the decimal in integer types
DecimalFormat decimalFormatter = new DecimalFormat("#");
String formatedfInput = decimalFormatter.format(Double.parseDouble(data[pair.getKey()-1]));
dataRow[pair.getKey()-1] = Short.valueOf(formatedfInput);
break;
}
case java.sql.Types.BIGINT:
{
BigDecimal bd = new BigDecimal(data[pair.getKey()-1].trim());
try
{
dataRow[pair.getKey()-1] = bd.setScale(0,BigDecimal.ROUND_DOWN).longValueExact();
}
catch(ArithmeticException ex)
{
String value = "'" + data[pair.getKey()-1] + "'";
MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_errorConvertingValue"));
throw new SQLServerException(form.format(new Object[] {value, JDBCType.of(cm.columnType)}), null, 0, null);
}
break;
}
case java.sql.Types.DECIMAL:
case java.sql.Types.NUMERIC:
{
BigDecimal bd = new BigDecimal(data[pair.getKey()-1].trim());
dataRow[pair.getKey()-1] = bd.setScale(cm.scale, RoundingMode.HALF_UP);
break;
}
case java.sql.Types.BIT:
{
// "true" => 1, "false" => 0
// Any non-zero value (integer/double) => 1, 0/0.0 => 0
try
{
dataRow[pair.getKey()-1] =
(0 == Double.parseDouble(data[pair.getKey()-1])) ?
Boolean.FALSE : Boolean.TRUE;
}
catch(NumberFormatException e)
{
dataRow[pair.getKey()-1] = Boolean.parseBoolean(data[pair.getKey()-1]);
}
break;
}
case java.sql.Types.REAL:
{
dataRow[pair.getKey()-1] = Float.parseFloat(data[pair.getKey()-1]);
break;
}
case java.sql.Types.DOUBLE:
{
dataRow[pair.getKey()-1] = Double.parseDouble(data[pair.getKey()-1]);
break;
}
case java.sql.Types.BINARY:
case java.sql.Types.VARBINARY:
case java.sql.Types.LONGVARBINARY:
case java.sql.Types.BLOB:
{
/*
* For binary data, the value in file may or may not have the '0x' prefix.
* We will try to match our implementation with 'BULK INSERT' except that we will
* allow 0x prefix whereas 'BULK INSERT' command does not allow 0x prefix.
* A BULK INSERT example:
* A sample csv file containing data for 2 binary columns and 1 row:
* 61,62
* Table definition: create table t1(c1 varbinary(10), c2 varbinary(10))
* BULK INSERT command: bulk insert t1 from 'C:\in.csv' with(DATAFILETYPE='char',firstrow=1,FIELDTERMINATOR=',')
* select * from t1 shows 1 row with columns: 0x61, 0x62
*/
// Strip off 0x if present.
String binData = data[pair.getKey()-1].trim();
if (binData.startsWith("0x") ||
binData.startsWith("0X"))
{
dataRow[pair.getKey()-1] = binData.substring(2);
}
else
{
dataRow[pair.getKey()-1] = binData;
}
break;
}
case 2013: // java.sql.Types.TIME_WITH_TIMEZONE
{
DriverJDBCVersion.checkSupportsJDBC42();
OffsetTime offsetTimeValue = null;
// The per-column DateTimeFormatter gets priority.
if (null != cm.dateTimeFormatter)
offsetTimeValue= OffsetTime.parse(data[pair.getKey()-1], cm.dateTimeFormatter);
else if(timeFormatter != null)
offsetTimeValue= OffsetTime.parse(data[pair.getKey()-1], timeFormatter);
else
offsetTimeValue= OffsetTime.parse(data[pair.getKey()-1]);
dataRow[pair.getKey()-1] = offsetTimeValue;
break;
}
case 2014: // java.sql.Types.TIMESTAMP_WITH_TIMEZONE
{
DriverJDBCVersion.checkSupportsJDBC42();
OffsetDateTime offsetDateTimeValue = null;
// The per-column DateTimeFormatter gets priority.
if (null != cm.dateTimeFormatter)
offsetDateTimeValue= OffsetDateTime.parse(data[pair.getKey()-1], cm.dateTimeFormatter);
else if(dateTimeFormatter != null)
offsetDateTimeValue= OffsetDateTime.parse(data[pair.getKey()-1], dateTimeFormatter);
else
offsetDateTimeValue= OffsetDateTime.parse(data[pair.getKey()-1]);
dataRow[pair.getKey()-1] = offsetDateTimeValue;
break;
}
case java.sql.Types.NULL:
{
dataRow[pair.getKey()-1] = null;
break;
}
case java.sql.Types.DATE:
case java.sql.Types.CHAR:
case java.sql.Types.NCHAR:
case java.sql.Types.VARCHAR:
case java.sql.Types.NVARCHAR:
case java.sql.Types.LONGVARCHAR:
case java.sql.Types.LONGNVARCHAR:
case java.sql.Types.CLOB:
default:
{
// The string is copied as is.
/* Handling double quotes:
* Both BCP (without a format file) and BULK INSERT behaves the same way for double quotes.
* They treat double quotes as part of the data. For a CSV file as follows, data is inserted as is:
""abc""
"abc"
abc
a"b"c
a""b""c
* Excel on the other hand, shows data as follows. It strips off beginning and ending quotes, and sometimes quotes get messed up.
* When the same CSV is saved from Excel again, Excel adds additional quotes.
abc""
abc
abc
a"b"c
a""b""c
* In our implementation we will match the behavior with BCP and BULK INSERT.
* BCP command: bcp table1 in in.csv -c -t , -r 0x0A -S localhost -U sa -P
* BULK INSERT command: bulk insert table1 from 'in.csv' with (FIELDTERMINATOR=',')
*
* Handling delimiters in data:
* Excel allows comma in data when data is surrounded with quotes. For example,
* "Hello, world" is treated as one cell. BCP and BULK INSERT deos not allow field
* terminators in data:
* https://technet.microsoft.com/en-us/library/aa196735%28v=sql.80%29.aspx?f=255&MSPPError=-2147217396
*/
dataRow[pair.getKey()-1] = data[pair.getKey()-1];
break;
}
}
}
catch(IllegalArgumentException e)
{
String value = "'" + data[pair.getKey()-1] + "'";
MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_errorConvertingValue"));
throw new SQLServerException(form.format(new Object[] {value, JDBCType.of(cm.columnType)}), null, 0, null);
}
catch(ArrayIndexOutOfBoundsException e)
{
throw new SQLServerException(SQLServerException.getErrString("R_BulkCSVDataSchemaMismatch"), null);
}
}
return dataRow;
}
}
@Override
public boolean next() throws SQLServerException {
try {
currentLine = fileReader.readLine();
} catch (IOException e) {
throw new SQLServerException(null , e.getMessage() , null , 0 , false);
}
return (null != currentLine);
}
/*
* Helper method to throw a SQLServerExeption with the invalidArgument message and given argument.
*/
private void throwInvalidArgument(String argument) throws SQLServerException
{
MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_invalidArgument"));
Object[] msgArgs = {argument};
SQLServerException.makeFromDriverError(null, null, form.format(msgArgs), null, false);
}
/*
* Method to throw a SQLServerExeption for duplicate column names
*/
private void checkDuplicateColumnName(int positionInFile, String colName)
throws SQLServerException
{
if (null != colName && colName.trim().length() != 0)
{
for (Entry entry : columnMetadata.entrySet())
{
// duplicate check is not performed in case of same positionInFile value
if (null != entry && entry.getKey() != positionInFile)
{
if (null != entry.getValue() &&
colName.trim().equalsIgnoreCase(entry.getValue().columnName))
{
throw new SQLServerException(
SQLServerException.getErrString("R_BulkCSVDataDuplicateColumn"),
null);
}
}
}
}
}
}