From 23e961dd0d5aedd7e3acacda936e188b8980603c Mon Sep 17 00:00:00 2001 From: Calvin Loncaric Date: Fri, 14 Jun 2024 13:06:54 -0700 Subject: [PATCH 1/3] Add BufferedRandomAccessFile spec Signed-off-by: Calvin Loncaric --- README.md | 1 + manifest.json | 32 ++ .../braf/BufferedRandomAccessFile.cfg | 24 + .../braf/BufferedRandomAccessFile.tla | 447 ++++++++++++++++++ specifications/braf/README.md | 8 + 5 files changed, 512 insertions(+) create mode 100644 specifications/braf/BufferedRandomAccessFile.cfg create mode 100644 specifications/braf/BufferedRandomAccessFile.tla create mode 100644 specifications/braf/README.md diff --git a/README.md b/README.md index 540f4063..b51b420f 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,7 @@ Here is a list of specs included in this repository, with links to the relevant | [TCP as defined in RFC 9293](specifications/tcp) | Markus Kuppe | | | | ✔ | | | [TLA⁺ Level Checking](specifications/LevelChecking) | Leslie Lamport | | | | | | | [Condition-Based Consensus](specifications/cbc_max) | Thanh Hai Tran, Igor Konnov, Josef Widder | | | | | | +| [Buffered Random Access File](specifications/braf) | Calvin Loncaric | | | | ✔ | | ## Examples Elsewhere Here is a list of specs stored in locations outside this repository, including submodules. diff --git a/manifest.json b/manifest.json index 512f52ad..807e447f 100644 --- a/manifest.json +++ b/manifest.json @@ -3415,6 +3415,38 @@ } ] }, + { + "path": "specifications/braf", + "title": "Buffered Random Access File", + "description": "Specification of a Java file caching layer", + "sources": ["https://github.com/tlaplus/tlaplus/tree/4613109641676389d97b8df209d6cf4d90d31c1c/tlatools/org.lamport.tlatools/src/tlc2/util/BufferedRandomAccessFile.tla"], + "authors": ["Calvin Loncaric"], + "tags": [], + "modules": [ + { + "path": "specifications/braf/BufferedRandomAccessFile.tla", + "communityDependencies": [], + "tlaLanguageVersion": 2, + "features": [], + "models": [ + { + "path": "specifications/braf/BufferedRandomAccessFile.cfg", + "runtime": "00:09:00", + "size": "small", + "mode": "exhaustive search", + "features": [ + "alias", + "symmetry" + ], + "result": "success", + "distinctStates": 3316, + "totalStates": 128581, + "stateDepth": 10 + } + ] + } + ] + }, { "path": "specifications/byihive", "title": "RFC 3506: Voucher Transaction System", diff --git a/specifications/braf/BufferedRandomAccessFile.cfg b/specifications/braf/BufferedRandomAccessFile.cfg new file mode 100644 index 00000000..1a717dcc --- /dev/null +++ b/specifications/braf/BufferedRandomAccessFile.cfg @@ -0,0 +1,24 @@ +SPECIFICATION Spec +CONSTANT Symbols = {A, B} +CONSTANT ArbitrarySymbol = ArbitrarySymbol +CONSTANT MaxOffset = 3 +CONSTANT BuffSz = 2 + +SYMMETRY Symmetry +ALIAS Alias + +INVARIANT TypeOK +INVARIANT Inv1 +\*INVARIANT Inv2 \* see note at definition of Inv2 +PROPERTY Inv2CanAlwaysBeRestored +INVARIANT Inv3 +INVARIANT Inv4 +INVARIANT Inv5 +PROPERTY Safety +PROPERTY FlushBufferCorrect +PROPERTY SeekCorrect +PROPERTY SeekEstablishesInv2 +PROPERTY Write1Correct +PROPERTY Read1Correct +PROPERTY WriteAtMostCorrect +PROPERTY ReadCorrect diff --git a/specifications/braf/BufferedRandomAccessFile.tla b/specifications/braf/BufferedRandomAccessFile.tla new file mode 100644 index 00000000..9f5b94a8 --- /dev/null +++ b/specifications/braf/BufferedRandomAccessFile.tla @@ -0,0 +1,447 @@ +\* Copyright (c) 2024, Oracle and/or its affiliates. + +----------------------- MODULE BufferedRandomAccessFile ----------------------- +\* This is a model-checkable specification for BufferedRandomAccessFile.java. +\* It covers the core fields as well as the seek, read, write, flush, and +\* setLength operations. +\* +\* There are three major correctess conditions: +\* +\* (1) the internal invariants V1-V5 should hold +\* (2) the behavior should refine a general RandomAccessFile +\* (3) each operation should refine its RandomAccessFile counterpart +\* +\* Readers will probably want to start with the general RandomAccessFile spec +\* before reading this one. + +EXTENDS Naturals, Sequences, TLC, Common + +CONSTANT BuffSz + +VARIABLES + \* in-memory variables (BufferedRandomAccessFile class fields) + dirty, + length, + curr, + lo, + buff, + diskPos, + + \* the underlying file + file_content, + file_pointer + +vars == << + dirty, length, curr, lo, buff, diskPos, + file_content, file_pointer>> + +TypeOK == + /\ dirty \in BOOLEAN + /\ length \in Offset + /\ curr \in Offset + /\ lo \in Offset + /\ buff \in Array(SymbolOrArbitrary, BuffSz) + /\ diskPos \in Offset + + /\ file_content \in ArrayOfAnyLength(SymbolOrArbitrary) + /\ ArrayLen(file_content) <= MaxOffset + /\ file_pointer \in Offset + +------------------------------------------------------------------------------- +\* Internal invariants (copied from comment in BufferedRandomAccessFile.java) + +RelevantBufferContent == + ArraySlice(buff, 0, Min(BuffSz, length - lo)) + +LogicalFileContent == \* denoted c(f) in .java file + IF ArrayLen(RelevantBufferContent) > 0 + THEN WriteToFile(file_content, lo, RelevantBufferContent) + ELSE file_content + +DiskF(i) == \* denoted disk(f)[i] in .java file + IF i >= 0 /\ i < ArrayLen(file_content) + THEN ArrayGet(file_content, i) + ELSE ArbitrarySymbol + +BufferedIndexes == lo .. (Min(lo + BuffSz, length) - 1) + +Inv1 == + \* /\ f.closed == closed(f) \* close() not described in this spec + \* /\ f.curr == curr(f) \* by definition; see `file_pointer <- curr` in refinement mapping below + /\ length = ArrayLen(LogicalFileContent) + /\ diskPos = file_pointer + +\* Inv2 is a bit special. Most methods restore it just before they return. It +\* is generally restored by calling `restoreInvariantsAfterIncreasingCurr()`. +\* But, that behavior is difficult to model in straight TLA+ because each +\* method may modify variables multiple times. So instead, this spec treats +\* Inv2 as a precondition for the methods and verifies that it is always +\* restored by calling `restoreInvariantsAfterIncreasingCurr()`. +\* See `Inv2CanAlwaysBeRestored` below. +Inv2 == + /\ lo <= curr + /\ curr < lo + BuffSz + +Inv3 == + \A i \in BufferedIndexes: + ArrayGet(LogicalFileContent, i) = ArrayGet(buff, i - lo) + +Inv4 == + \A i \in 0 .. (length - 1): + i \notin BufferedIndexes => + ArrayGet(LogicalFileContent, i) = DiskF(i) + +Inv5 == + (\E i \in BufferedIndexes: DiskF(i) /= ArrayGet(buff, i - lo)) => + dirty + +------------------------------------------------------------------------------- +\* Behavior + +Init == + /\ dirty = FALSE + /\ length = 0 + /\ curr = 0 + /\ lo = 0 + /\ buff \in Array({ArbitrarySymbol}, BuffSz) + /\ diskPos = 0 + /\ file_pointer = 0 + /\ file_content = EmptyArray + +FlushBuffer == + /\ dirty + /\ LET len == Min(length - lo, BuffSz) IN + /\ IF len > 0 + THEN LET diskPosA == lo IN \* super.seek(this.lo) + /\ file_content' = WriteToFile(file_content, diskPosA, ArraySlice(buff, 0, len)) + /\ file_pointer' = diskPosA + len + /\ diskPos' = lo + len + ELSE + UNCHANGED <> + /\ dirty' = FALSE + /\ UNCHANGED <> + +\* Helper for Seek (not a full action): +\* - reads lo' +\* - constrains diskPos', file_pointer', and buff' +FillBuffer == + LET diskPosA == lo' IN + /\ buff' = MkArray(BuffSz, [i \in 0..BuffSz |-> + LET fileOffset == diskPosA + i IN + IF fileOffset < ArrayLen(file_content) + THEN ArrayGet(file_content, fileOffset) + ELSE ArbitrarySymbol]) + /\ file_pointer' = Min(diskPosA + BuffSz, ArrayLen(file_content)) + /\ diskPos' = Min(diskPosA + BuffSz, ArrayLen(file_content)) + +Seek(pos) == + /\ curr' = pos + /\ IF pos < lo \/ pos >= (lo + BuffSz) THEN + /\ ~dirty \* call to FlushBuffer + /\ lo' = (pos \div BuffSz) * BuffSz + /\ FillBuffer + ELSE + UNCHANGED <> + /\ UNCHANGED <> + +SetLength(newLength) == + /\ file_content' = TruncateOrExtendFile(file_content, newLength) + /\ IF ArrayLen(file_content) > newLength /\ file_pointer > newLength + THEN file_pointer' = newLength + ELSE file_pointer' \in Offset + /\ length' = newLength + /\ diskPos' = file_pointer' + /\ IF curr > newLength + THEN curr' = newLength + ELSE UNCHANGED curr + \* In reality the buffer doesn't change---but some of its bytes might no + \* longer be relevant and have to be marked as arbitrary. + /\ buff' = MkArray(BuffSz, [i \in 0..(BuffSz-1) |-> + IF lo + i < newLength + THEN ArrayGet(buff, i) + ELSE ArbitrarySymbol]) + /\ UNCHANGED <> + +Read1(byte) == + /\ Inv2 + /\ curr < length + /\ byte = ArrayGet(buff, curr - lo) + /\ curr' = curr + 1 + /\ UNCHANGED <> + +Write1(byte) == + /\ curr + 1 <= MaxOffset \* bound model checking + /\ Inv2 + /\ buff' = ArraySet(buff, curr - lo, byte) + /\ curr' = curr + 1 + /\ dirty' = TRUE + /\ length' = Max(length, curr') + /\ UNCHANGED <> + +Read(data) == + LET numReadableWithoutSeeking == Min(lo + BuffSz, length) - curr IN + /\ Inv2 + /\ numReadableWithoutSeeking >= 0 + /\ LET + numToRead == Min(ArrayLen(data), numReadableWithoutSeeking) + buffOff == curr - lo + IN + /\ data = ArraySlice(buff, buffOff, buffOff + numToRead) + /\ curr' = curr + numToRead + /\ UNCHANGED <> + +\* The `write()` method is composed of repeated calls to `writeAtMost()`, so +\* verifying that the latter maintains all our invariants should be sufficient. +WriteAtMost(data) == + LET + numWriteableWithoutSeeking == Min(ArrayLen(data), lo + BuffSz - curr) + buffOff == curr - lo + IN + /\ Inv2 + /\ curr + numWriteableWithoutSeeking <= MaxOffset + /\ buff' = ArrayConcat(ArrayConcat( + ArraySlice(buff, 0, buffOff), + ArraySlice(data, 0, numWriteableWithoutSeeking)), + ArraySlice(buff, buffOff + numWriteableWithoutSeeking, ArrayLen(buff))) + /\ dirty' = TRUE + /\ curr' = curr + numWriteableWithoutSeeking + /\ length' = Max(length, curr') + /\ UNCHANGED <> + +Next == + \/ FlushBuffer + \/ \E offset \in Offset: + \/ Seek(offset) + \/ SetLength(offset) + \/ \E symbol \in SymbolOrArbitrary: + \/ Read1(symbol) + \/ Write1(symbol) + \/ \E len \in 1..MaxOffset: \E data \in Array(SymbolOrArbitrary, len): + \/ WriteAtMost(data) + \/ Read(data) + +Spec == Init /\ [][Next]_vars + +------------------------------------------------------------------------------- +\* Refinement of general RandomAccessFile + +RAF == INSTANCE RandomAccessFile WITH + file_content <- LogicalFileContent, + file_pointer <- curr + +Safety == RAF!Spec + +\* Ensure that the various actions behave according to their abstract specifications. +FlushBufferCorrect == [][FlushBuffer => UNCHANGED RAF!vars]_vars +SeekCorrect == [][\A offset \in Offset: Seek(offset) => RAF!Seek(offset)]_vars +SetLengthCorrect == [][\A offset \in Offset: SetLength(offset) => RAF!SetLength(offset)]_vars +SeekEstablishesInv2 == [][\A offset \in Offset: Seek(offset) => Inv2']_vars +Write1Correct == [][\A symbol \in SymbolOrArbitrary: Write1(symbol) => RAF!Write(SeqToArray(<>))]_vars +Read1Correct == [][\A symbol \in SymbolOrArbitrary: Read1(symbol) => RAF!Read(SeqToArray(<>))]_vars +WriteAtMostCorrect == [][\A len \in 1..MaxOffset: \A data \in Array(SymbolOrArbitrary, len): WriteAtMost(data) => \E written \in 1..len: RAF!Write(ArraySlice(data, 0, written))]_vars +ReadCorrect == [][\A len \in 1..MaxOffset: \A data \in Array(SymbolOrArbitrary, len): Read(data) => RAF!Read(data)]_vars + +\* Inv2 is a precondition for many actions; it should always be possible to +\* restore Inv2 by execuing `restoreInvariantsAfterIncreasingCurr()`. That +\* method calls `seeek(curr)`, which is composed of a FlushBuffer followed by a +\* Seek, or just a Seek. +\* +\* To ensure that `restoreInvariantsAfterIncreasingCurr()` works as expected +\* (without using the \cdot action composition operator), we'll verify a few +\* things: +\* - dirty => ENABLED FlushBuffer +\* - FlushBuffer => ~dirty' +\* - ~dirty => ENABLED Seek(curr) +\* - Seek(curr) => Inv2' +\* Together, those properties ensure that it is always possible to restore Inv2 +\* by taking a FlushBuffer action (if necessary) followed by a Seek(curr) +\* action. +FlushBufferPossibleWhenDirty == dirty => ENABLED FlushBuffer +FlushBufferMakesProgress == [][FlushBuffer => ~dirty']_vars +SeekCurrPossibleWhenNotDirty == ~dirty => ENABLED Seek(curr) +SeekCurrRestoresInv2 == [][Seek(curr) => Inv2']_vars +Inv2CanAlwaysBeRestored == + /\ []FlushBufferPossibleWhenDirty + /\ FlushBufferMakesProgress + /\ []SeekCurrPossibleWhenNotDirty + /\ SeekCurrRestoresInv2 + +------------------------------------------------------------------------------- +\* Model checking helper definitions + +Symmetry == Permutations(Symbols) + +Alias == [ + \* constants + BuffSz |-> BuffSz, + MaxOffset |-> MaxOffset, + + \* regular vars + dirty |-> dirty, + length |-> length, + curr |-> curr, + lo |-> lo, + buff |-> buff, + diskPos |-> diskPos, + file_content |-> file_content, + file_pointer |-> file_pointer, + + \* abstract vars + abstract_contents |-> LogicalFileContent] + +=============================================================================== + +--------------------------- MODULE RandomAccessFile --------------------------- +\* Specification of Java's RandomAccessFile class. +\* +\* A RandomAccessFile offers single-threaded access to some on-disk data +\* (`file_content`) and has an internal "pointer" or "cursor" (`file_pointer`). +\* Clients can move the pointer to an arbitrary position, or the client can +\* read or write data linearly from its current position, which simultaneously +\* advances the pointer. +\* +\* The core operations are: +\* - seek (to move the pointer) +\* - setLength (to resize the data) +\* - read (to copy bytes from disk to memory) +\* - write (to copy bytes from memory to disk) +\* +\* There are some cases where the general RandomAccessFile contract does not +\* define the data contents, for instance when extending the file using +\* setLength. In this spec, undefined bytes in the file are explicitly marked +\* with `ArbitrarySymbol`. While not entirely accurate, that choice simplifies +\* many definitions, since there is no need to nondeterministically choose +\* contents for the file. It also (incidentally) reduces state space explosion +\* during model checking. + +EXTENDS Naturals, Sequences, Common + +VARIABLES + file_content, + file_pointer + +vars == <> + +TypeOK == + /\ file_content \in ArrayOfAnyLength(SymbolOrArbitrary) + /\ ArrayLen(file_content) <= MaxOffset + /\ file_pointer \in Offset + +Init == + /\ file_content = EmptyArray + /\ file_pointer = 0 + +Seek(new_offset) == + /\ new_offset \in Offset + /\ file_pointer' = new_offset + /\ UNCHANGED <> + +SetLength(new_length) == + /\ file_content' = TruncateOrExtendFile(file_content, new_length) + + \* The pointer's behavior is very strange. Per RandomAccessFile docs [1]: + \* > If the present length of the file as returned by the length method is + \* > greater than the newLength argument then the file will be truncated. + \* > In this case, if the file offset as returned by the getFilePointer + \* > method is greater than newLength then after this method returns the + \* > offset will be equal to newLength. + \* + \* The docs say NOTHING else about the file pointer. So, we can assume + \* that there are no other formal restrictions on its behavior. + \* + \* [1]: https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/io/RandomAccessFile.html#setLength(long) + /\ IF ArrayLen(file_content) > new_length /\ file_pointer > new_length + THEN file_pointer' = new_length + ELSE file_pointer' \in Offset + +Read(output) == + /\ output = ArraySlice(file_content, file_pointer, Min(file_pointer + ArrayLen(output), ArrayLen(file_content))) + /\ file_pointer' = file_pointer + ArrayLen(output) + /\ UNCHANGED <> + +Write(data) == + /\ file_pointer + ArrayLen(data) <= MaxOffset + /\ file_content' = WriteToFile(file_content, file_pointer, data) + /\ file_pointer' = file_pointer + ArrayLen(data) + +Next == + \/ \E offset \in Offset: + \/ Seek(offset) + \/ SetLength(offset) + \/ \E len \in 1..MaxOffset: \E data \in Array(SymbolOrArbitrary, len): + \/ Write(data) + \/ Read(data) + +Spec == Init /\ [][Next]_vars + +=============================================================================== + +-------------------------------- MODULE Common -------------------------------- +\* This module contains constants and definitions common to both +\* RandomAccessFile and BufferedRandomAccessFile. + +EXTENDS Naturals, Sequences + +CONSTANTS + Symbols, \* data stored in the file (in reality there are 256 symbols: bytes 0x00 to 0xFF) + ArbitrarySymbol, \* special token for an arbitrary symbol (to reduce the need for nondeterministic choice) + MaxOffset \* the highest possible offset (in reality this is 2^63 - 1) + +\* The set of legal offsets +Offset == 0..MaxOffset + +\* The set of things that can appear at an offset in a file +SymbolOrArbitrary == Symbols \union {ArbitrarySymbol} + +\* Minimum and maximum of two numbers +Min(a, b) == IF a <= b THEN a ELSE b +Max(a, b) == IF a <= b THEN b ELSE a + +\* Definitions for 0-indexed arrays (as opposed to TLA+ 1-indexed sequences). +\* A major goal of the BufferedRandomAccessFile spec is to prevent off-by-one +\* errors in the implementation; therefore it should use 0-indexed arrays like +\* Java. +\* +\* The definitions are deliberately crafted so that the usual sequence +\* operators do NOT work on them; this is to help avoid accidental mixing of +\* sequences and arrays. +ArrayOfAnyLength(T) == [elems: Seq(T)] +Array(T, len) == [elems: [1..len -> T]] +ConstArray(len, x) == [elems |-> [i \in 1..len |-> x]] +MkArray(len, f) == [elems |-> [i \in 1..len |-> f[i - 1]]] +EmptyArray == [elems |-> <<>>] +ArrayLen(a) == Len(a.elems) +ArrayToSeq(a) == a.elems +SeqToArray(seq) == [elems |-> seq] +ArrayGet(a, i) == a.elems[i+1] +ArraySet(a, i, x) == [a EXCEPT !.elems[i+1] = x] +ArraySlice(a, startInclusive, endExclusive) == [elems |-> SubSeq(a.elems, startInclusive + 1, endExclusive)] +ArrayConcat(a1, a2) == [elems |-> a1.elems \o a2.elems] + +\* General contract of the file `write()` call: extend the file with +\* ArbitrarySymbols if necessary, then overlay some `data_to_write` at the +\* given offset. +WriteToFile(file, offset, data_to_write) == + LET + file_len == ArrayLen(file) + data_len == ArrayLen(data_to_write) + length == Max(file_len, offset + data_len) + IN + MkArray( + length, + [i \in 0..(length-1) |-> + CASE + i < offset -> IF i < file_len THEN ArrayGet(file, i) ELSE ArbitrarySymbol + [] + i >= offset /\ i < offset + data_len -> ArrayGet(data_to_write, i - offset) + [] + i >= offset + data_len -> ArrayGet(file, i)]) + +\* General contract of the file `setLength()` call: truncate the file or fill +\* it with ArbitrarySymbol to reach the desired length. +TruncateOrExtendFile(file, new_length) == + IF new_length > ArrayLen(file) + THEN ArrayConcat(file, ConstArray(new_length - ArrayLen(file), ArbitrarySymbol)) + ELSE ArraySlice(file, 0, new_length) + +=============================================================================== diff --git a/specifications/braf/README.md b/specifications/braf/README.md new file mode 100644 index 00000000..9144f208 --- /dev/null +++ b/specifications/braf/README.md @@ -0,0 +1,8 @@ +BufferedRandomAccessFile.tla +---------------------------- + +Specification of +[`BufferedRandomAccessFile.java`](https://github.com/tlaplus/tlaplus/blob/4613109641676389d97b8df209d6cf4d90d31c1c/tlatools/org.lamport.tlatools/src/tlc2/util/BufferedRandomAccessFile.java), +a caching layer that TLC uses to speed up interaction with the filesystem. + +Copied from [the TLA⁺ source code](https://github.com/tlaplus/tlaplus/tree/4613109641676389d97b8df209d6cf4d90d31c1c/tlatools/org.lamport.tlatools/src/tlc2/util/BufferedRandomAccessFile.tla) on 2024/6/14. From f3410da001f203509bd87354117580fbe71a29b4 Mon Sep 17 00:00:00 2001 From: Calvin Loncaric Date: Fri, 14 Jun 2024 13:20:28 -0700 Subject: [PATCH 2/3] Add liveness tag to BRAF manifest entry This tag isn't quite correct; the spec checks _action_ properties, not full liveness properties. But it is true that the spec uses the `[]` operator and the config file uses the `PROPERTY` keyword. Signed-off-by: Calvin Loncaric --- manifest.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/manifest.json b/manifest.json index 807e447f..946a2fa9 100644 --- a/manifest.json +++ b/manifest.json @@ -3436,7 +3436,8 @@ "mode": "exhaustive search", "features": [ "alias", - "symmetry" + "symmetry", + "liveness" ], "result": "success", "distinctStates": 3316, From 4fb4032a1031229856e470d1e3611d3713d307c7 Mon Sep 17 00:00:00 2001 From: Calvin Loncaric Date: Thu, 20 Jun 2024 14:17:44 -0700 Subject: [PATCH 3/3] Fix expected runtime (format is HH:MM:SS) Signed-off-by: Calvin Loncaric --- manifest.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifest.json b/manifest.json index 946a2fa9..7fe59c74 100644 --- a/manifest.json +++ b/manifest.json @@ -3431,7 +3431,7 @@ "models": [ { "path": "specifications/braf/BufferedRandomAccessFile.cfg", - "runtime": "00:09:00", + "runtime": "00:00:09", "size": "small", "mode": "exhaustive search", "features": [