La forma correcta para acelerar un archivo de texto es utilizar SetTextBuf
. Y quizás agregando {$I-} .... {$I+}
en todo el acceso a archivos.
var
TmpBuf: array[word] of byte;
..
{$I-}
AssignFile(fout, AFile);
Append(fout);
SetTextBuf(fOut,TmpBuf);
for idx := 0 to ndx do
begin
MyPat := CPatientItem(FList.Objects[idx]);
if not Assigned(MyPat) then Continue;
MyPat.WriteItem(fout, AReplicat, AllFields);
end;
if ioresult<>0 then
ShowMessage('Error writing file');
CloseFile(fout);
{$I+}
end;
En todos los casos, la API de archivo de edad, no se va a utilizar hoy en día ...
{$I-} .... {$I+}
ha de añadirse también alrededor de todo sus rutinas sub añadir contenido al archivo de texto.
Tengo algún experimento sobre la creación de grandes archivos de texto y búfer. Escribí una clase dedicada en la unidad Open Source SynCommons, llamada TTextWriter
, que está orientada a UTF-8. Lo uso en particular para la producción JSON o LOG writing a la mayor velocidad posible. Evita la asignación de montón más temporal (por ejemplo, para la conversión de un valor entero), por lo que es incluso muy bueno en la escala de múltiples hilos. Algunos métodos de alto nivel están disponibles para formatear texto de una matriz abierta, como la función format()
, pero mucho más rápido.
Aquí es la interfaz de esta clase:
/// simple writer to a Stream, specialized for the TEXT format
// - use an internal buffer, faster than string+string
// - some dedicated methods is able to encode any data with JSON escape
TTextWriter = class
protected
B, BEnd: PUTF8Char;
fStream: TStream;
fInitialStreamPosition: integer;
fStreamIsOwned: boolean;
// internal temporary buffer
fTempBufSize: Integer;
fTempBuf: PUTF8Char;
// [0..4] for 'u0001' four-hex-digits template, [5..7] for one UTF-8 char
BufUnicode: array[0..7] of AnsiChar;
/// flush and go to next char
function FlushInc: PUTF8Char;
function GetLength: integer;
public
/// the data will be written to the specified Stream
// - aStream may be nil: in this case, it MUST be set before using any
// Add*() method
constructor Create(aStream: TStream; aBufSize: integer=1024);
/// the data will be written to an internal TMemoryStream
constructor CreateOwnedStream;
/// release fStream is is owned
destructor Destroy; override;
/// retrieve the data as a string
// - only works if the associated Stream Inherits from TMemoryStream: return
// '' if it is not the case
function Text: RawUTF8;
/// write pending data to the Stream
procedure Flush;
/// append one char to the buffer
procedure Add(c: AnsiChar); overload; {$ifdef HASINLINE}inline;{$endif}
/// append two chars to the buffer
procedure Add(c1,c2: AnsiChar); overload; {$ifdef HASINLINE}inline;{$endif}
/// append an Integer Value as a String
procedure Add(Value: Int64); overload;
/// append an Integer Value as a String
procedure Add(Value: integer); overload;
/// append a Currency from its Int64 in-memory representation
procedure AddCurr64(Value: PInt64); overload;
/// append a Currency from its Int64 in-memory representation
procedure AddCurr64(const Value: Int64); overload;
/// append a TTimeLog value, expanded as Iso-8601 encoded text
procedure AddTimeLog(Value: PInt64);
/// append a TDateTime value, expanded as Iso-8601 encoded text
procedure AddDateTime(Value: PDateTime); overload;
/// append a TDateTime value, expanded as Iso-8601 encoded text
procedure AddDateTime(const Value: TDateTime); overload;
/// append an Unsigned Integer Value as a String
procedure AddU(Value: cardinal);
/// append a floating-point Value as a String
// - double precision with max 3 decimals is default here, to avoid rounding
// problems
procedure Add(Value: double; decimals: integer=3); overload;
/// append strings or integers with a specified format
// - % = #37 indicates a string, integer, floating-point, or class parameter
// to be appended as text (e.g. class name)
// - $ = #36 indicates an integer to be written with 2 digits and a comma
// - £ = #163 indicates an integer to be written with 4 digits and a comma
// - µ = #181 indicates an integer to be written with 3 digits without any comma
// - ¤ = #164 indicates CR+LF chars
// - CR = #13 indicates CR+LF chars
// - § = #167 indicates to trim last comma
// - since some of this characters above are > #127, they are not UTF-8
// ready, so we expect the input format to be WinAnsi, i.e. mostly English
// text (with chars < #128) with some values to be inserted inside
// - if StringEscape is false (by default), the text won't be escaped before
// adding; but if set to true text will be JSON escaped at writing
procedure Add(Format: PWinAnsiChar; const Values: array of const;
Escape: TTextWriterKind=twNone); overload;
/// append CR+LF chars
procedure AddCR; {$ifdef HASINLINE}inline;{$endif}
/// write the same character multiple times
procedure AddChars(aChar: AnsiChar; aCount: integer);
/// append an Integer Value as a 2 digits String with comma
procedure Add2(Value: integer);
/// append the current date and time, in a log-friendly format
// - e.g. append '20110325 19241502 '
// - this method is very fast, and avoid most calculation or API calls
procedure AddCurrentLogTime;
/// append an Integer Value as a 4 digits String with comma
procedure Add4(Value: integer);
/// append an Integer Value as a 3 digits String without any added comma
procedure Add3(Value: integer);
/// append a line of text with CR+LF at the end
procedure AddLine(const Text: shortstring);
/// append a String
procedure AddString(const Text: RawUTF8); {$ifdef HASINLINE}inline;{$endif}
/// append a ShortString
procedure AddShort(const Text: ShortString); {$ifdef HASINLINE}inline;{$endif}
/// append a ShortString property name, as '"PropName":'
procedure AddPropName(const PropName: ShortString);
/// append an Instance name and pointer, as '"TObjectList(00425E68)"'+SepChar
// - Instance must be not nil
procedure AddInstanceName(Instance: TObject; SepChar: AnsiChar);
/// append an Instance name and pointer, as 'TObjectList(00425E68)'+SepChar
// - Instance must be not nil
procedure AddInstancePointer(Instance: TObject; SepChar: AnsiChar);
/// append an array of integers as CSV
procedure AddCSV(const Integers: array of Integer); overload;
/// append an array of doubles as CSV
procedure AddCSV(const Doubles: array of double; decimals: integer); overload;
/// append an array of RawUTF8 as CSV
procedure AddCSV(const Values: array of RawUTF8); overload;
/// write some data as hexa chars
procedure WrHex(P: PAnsiChar; Len: integer);
/// write some data Base64 encoded
// - if withMagic is TRUE, will write as '"\uFFF0base64encodedbinary"'
procedure WrBase64(P: PAnsiChar; Len: cardinal; withMagic: boolean);
/// write some #0 ended UTF-8 text, according to the specified format
procedure Add(P: PUTF8Char; Escape: TTextWriterKind); overload;
/// write some #0 ended UTF-8 text, according to the specified format
procedure Add(P: PUTF8Char; Len: PtrInt; Escape: TTextWriterKind); overload;
/// write some #0 ended Unicode text as UTF-8, according to the specified format
procedure AddW(P: PWord; Len: PtrInt; Escape: TTextWriterKind); overload;
/// append some chars to the buffer
// - if Len is 0, Len is calculated from zero-ended char
// - don't escapes chars according to the JSON RFC
procedure AddNoJSONEscape(P: Pointer; Len: integer=0);
/// append some binary data as hexadecimal text conversion
procedure AddBinToHex(P: Pointer; Len: integer);
/// fast conversion from binary data into hexa chars, ready to be displayed
// - using this function with Bin^ as an integer value will encode it
// in big-endian order (most-signignifican byte first): use it for display
// - up to 128 bytes may be converted
procedure AddBinToHexDisplay(Bin: pointer; BinBytes: integer);
/// add the pointer into hexa chars, ready to be displayed
procedure AddPointer(P: PtrUInt);
/// append some unicode chars to the buffer
// - WideCharCount is the unicode chars count, not the byte size
// - don't escapes chars according to the JSON RFC
// - will convert the Unicode chars into UTF-8
procedure AddNoJSONEscapeW(P: PWord; WideCharCount: integer);
/// append some UTF-8 encoded chars to the buffer
// - if Len is 0, Len is calculated from zero-ended char
// - escapes chars according to the JSON RFC
procedure AddJSONEscape(P: Pointer; Len: PtrInt=0); overload;
/// append some UTF-8 encoded chars to the buffer, from a generic string type
// - faster than AddJSONEscape(pointer(StringToUTF8(string))
// - if Len is 0, Len is calculated from zero-ended char
// - escapes chars according to the JSON RFC
procedure AddJSONEscapeString(const s: string); {$ifdef UNICODE}inline;{$endif}
/// append some Unicode encoded chars to the buffer
// - if Len is 0, Len is calculated from zero-ended widechar
// - escapes chars according to the JSON RFC
procedure AddJSONEscapeW(P: PWord; Len: PtrInt=0);
/// append an open array constant value to the buffer
// - "" will be added if necessary
// - escapes chars according to the JSON RFC
// - very fast (avoid most temporary storage)
procedure AddJSONEscape(const V: TVarRec); overload;
/// append a dynamic array content as UTF-8 encoded JSON array
// - expect a dynamic array TDynArray wrapper as incoming parameter
// - TIntegerDynArray, TInt64DynArray, TCardinalDynArray, TDoubleDynArray,
// TCurrencyDynArray, TWordDynArray and TByteDynArray will be written as
// numerical JSON values
// - TRawUTF8DynArray, TWinAnsiDynArray, TRawByteStringDynArray,
// TStringDynArray, TWideStringDynArray, TSynUnicodeDynArray, TTimeLogDynArray,
// and TDateTimeDynArray will be written as escaped UTF-8 JSON strings
// (and Iso-8601 textual encoding if necessary)
// - any other kind of dynamic array (including array of records) will be
// written as Base64 encoded binary stream, with a JSON_BASE64_MAGIC prefix
// (UTF-8 encoded \uFFF0 special code)
// - examples: '[1,2,3,4]' or '["\uFFF0base64encodedbinary"]'
procedure AddDynArrayJSON(const DynArray: TDynArray);
/// append some chars to the buffer in one line
// - P should be ended with a #0
// - will write #1..#31 chars as spaces (so content will stay on the same line)
procedure AddOnSameLine(P: PUTF8Char); overload;
/// append some chars to the buffer in one line
// - will write #0..#31 chars as spaces (so content will stay on the same line)
procedure AddOnSameLine(P: PUTF8Char; Len: PtrInt); overload;
/// append some wide chars to the buffer in one line
// - will write #0..#31 chars as spaces (so content will stay on the same line)
procedure AddOnSameLineW(P: PWord; Len: PtrInt);
/// serialize as JSON the given object
// - this default implementation will write null, or only write the
// class name and pointer if FullExpand is true - use TJSONSerializer.
// WriteObject method for full RTTI handling
// - default implementation will write TList/TCollection/TStrings/TRawUTF8List
// as appropriate array of class name/pointer (if FullExpand=true) or string
procedure WriteObject(Value: TObject; HumanReadable: boolean=false;
DontStoreDefault: boolean=true; FullExpand: boolean=false); virtual;
/// the last char appended is canceled
procedure CancelLastChar; {$ifdef HASINLINE}inline;{$endif}
/// the last char appended is canceled if it was a ','
procedure CancelLastComma; {$ifdef HASINLINE}inline;{$endif}
/// rewind the Stream to the position when Create() was called
procedure CancelAll;
/// count of add byte to the stream
property TextLength: integer read GetLength;
/// the internal TStream used for storage
property Stream: TStream read fStream write fStream;
end;
Como se puede ver, hay incluso una cierta serialización disponibles, y los métodos CancelLastComma/CancelLastChar
son muy útiles para producir JSON rápido o datos CSV desde un bucle.
Acerca de la velocidad y el tiempo, esta rutina es más rápida que mi acceso al disco, que es de alrededor de 100 MB/s. Creo que puede lograr alrededor de 500 MB/s al agregar datos en un TMemoryStream en lugar de un TFileStream.
¿Quieres que considerar el uso de corrientes en vez de presentar Pascal E/S? –
O TStringList con SaveTo File()? Pero antes que nada debes probar qué tan rápido es recorrer tus datos sin que el archivo escriba. –
¿Ya ejecutó un perfilador? Le dirá dónde pasa su tiempo su programa. –